示例#1
0
    def sendSites(self):
        try:
            if not self.endPoint:
                self.getEflashData()
                self.threadPool(self.listDict)
                return self.data

            elif 'xml' == self.endPoint:
                if any(site in self.site for site in self.atom):
                    self.getAtomData()
                else:
                    self.getProductJsonData()
                self.getSitemapData()
                self.threadPool(self.listDict)
                return self.data

            elif 'atom' == self.endPoint:
                self.getAtomData()
                self.threadPool(self.listDict)
                return self.data

            elif 'json' == self.endPoint:
                self.getProductJsonData()
                self.threadPool(self.listDict)
                return self.data

            elif 'json2' == self.endPoint:
                self.getProductJson2Data()
                return self.data
        except Exception as e:
            print(u.getDatetime(), self.site, e)
示例#2
0
    def getEflashData(self):
        try:
            r = self.s.get(self.site + f'?_={uuid4().hex}',
                           proxies={'https': 'http://{}'.format(self.p)},
                           timeout=(1, 5))
            r.raise_for_status()

            tree = etree.HTML(r.content)
            self.checkEflashIfOos(tree)

            for products in tree.xpath('//div[@class="grid-view-item"]'):
                href = self.site + products.xpath('a/@href')[0]
                updated = None
                if href not in self.data:
                    self.data[href] = {'href': href, 'updated': updated}
                    key = {
                        'href': href,
                        'updated': updated,
                        'oldStockCount': 'NEW'
                    }
                    cf.ThreadPoolExecutor().submit(
                        self.getVendorAndStock, key, random.choice(
                            self.infoP)).add_done_callback(self.callback)
        except (requests.Timeout, requests.ConnectionError):
            pass
        except requests.HTTPError:
            if r.status_code == 430:
                self.badP = self.p
        except Exception as e:
            print(u.getDatetime(), self.site, e)
示例#3
0
    def getSitemapData(self):
        try:
            r = self.s.get(self.site +
                           f'sitemap_products_1.xml?_={uuid4().hex}',
                           proxies={'https': 'http://{}'.format(self.p)},
                           timeout=(1, 5))
            r.raise_for_status()

            tree = ET.fromstring(r.content)

            for child in tree[1:]:
                try:
                    title = child[3][1].text
                    if any(keyword in title.lower()
                           for keyword in self.keywords) and not any(
                               keyword in title.lower()
                               for keyword in self.ignoreKW):
                        continue
                except:
                    pass
                href = child[0].text
                updated = child[1].text
                if href in self.data:
                    if updated > self.data[href]['updated']:
                        self.data[href]['updated'] = updated
                        key = {
                            'href': href,
                            'updated': updated,
                            'vendor': self.data[href]['vendor'],
                            'oldStockCount': self.data[href]['stockCount']
                        }
                        cf.ThreadPoolExecutor().submit(
                            self.getVendorAndStock, key,
                            random.choice(self.infoP)).add_done_callback(
                                self.callback)
                else:
                    self.data[href] = {'href': href, 'updated': updated}
                    key = {
                        'href': href,
                        'updated': updated,
                        'oldStockCount': 'NEW'
                    }
                    cf.ThreadPoolExecutor().submit(
                        self.getVendorAndStock, key, random.choice(
                            self.infoP)).add_done_callback(self.callback)
        except (requests.Timeout, requests.ConnectionError) as e:
            pass
        except requests.HTTPError:
            if r.status_code == 429 and 'kith' in self.site:
                self.getKith429()
            elif r.status_code == 430:
                self.badP = self.p
        except Exception as e:
            print(u.getDatetime(), self.site, e)
示例#4
0
    def getProductJson2Data(self, page):
        try:
            r = self.s.get(self.site +
                           f'products.json?page={str(page)}&_={uuid4().hex}',
                           proxies={'https': 'http://{}'.format(self.p)},
                           timeout=(1, 1))
            r.raise_for_status()

            for products in r.json()['products']:
                title = products['title']
                if any(keyword in title.lower()
                       for keyword in self.keywords) and not any(
                           keyword in title.lower()
                           for keyword in self.ignoreKW):
                    continue
                href = '{}products/{}'.format(self.site, products['handle'])
                updated = products['updated_at']
                vendor = products['vendor'].lower()
                stockCount = 'IN STOCK/HIDDEN' if [
                    avail['available'] for avail in products['variants']
                    if avail['available']
                ] else 'OOS/HIDDEN'
                if href in self.data:
                    if updated > self.data[href]['updated']:
                        self.data[href]['updated'] = updated
                        if self.data[href][
                                'stockCount'] == 'OOS/HIDDEN' and stockCount == 'IN STOCK/HIDDEN':
                            if any(brand in vendor for brand in
                                   self.brands) or len(vendor) < 3:
                                cf.ThreadPoolExecutor().submit(
                                    self.sendToSlack, href, updated, products,
                                    stockCount, vendor)
                        self.data[href]['stockCount'] = stockCount
                else:
                    self.data[href] = {
                        'href': href,
                        'updated': updated,
                        'vendor': vendor,
                        'stockCount': stockCount
                    }
                    if (any(brand in vendor
                            for brand in self.brands) or len(vendor) < 3
                        ) and updated.split('T')[0] >= u.getDate():
                        cf.ThreadPoolExecutor().submit(self.sendToSlack, href,
                                                       updated, products,
                                                       stockCount, vendor)
        except (requests.Timeout, requests.ConnectionError) as e:
            pass
        except requests.HTTPError:
            if r.status_code == 430:
                self.badP = self.p
        except Exception as e:
            print(u.getDatetime(), self.site, e)
示例#5
0
    def getProductJsonData(self):
        try:
            r = self.s.get(self.site + f'products.json?_={uuid4().hex}',
                           proxies={'https': 'http://{}'.format(self.p)},
                           timeout=(1, 1))
            r.raise_for_status()

            for products in r.json()['products']:
                title = products['title']
                if any(keyword in title.lower()
                       for keyword in self.keywords) and not any(
                           keyword in title.lower()
                           for keyword in self.ignoreKW):
                    continue
                href = '{}products/{}'.format(self.site, products['handle'])
                updated = products['updated_at']
                if href in self.data:
                    if updated > self.data[href]['updated']:
                        self.data[href]['updated'] = updated
                        key = {
                            'href': href,
                            'updated': updated,
                            'vendor': self.data[href]['vendor'],
                            'oldStockCount': self.data[href]['stockCount']
                        }
                        cf.ThreadPoolExecutor().submit(
                            self.getVendorAndStock, key,
                            random.choice(self.infoP)).add_done_callback(
                                self.callback)
                else:
                    self.data[href] = {'href': href, 'updated': updated}
                    key = {
                        'href': href,
                        'updated': updated,
                        'oldStockCount': 'NEW'
                    }
                    cf.ThreadPoolExecutor().submit(
                        self.getVendorAndStock, key, random.choice(
                            self.infoP)).add_done_callback(self.callback)
        except (requests.Timeout, requests.ConnectionError) as e:
            pass
        except requests.HTTPError:
            if r.status_code == 430:
                self.badP = self.p
        except Exception as e:
            print(u.getDatetime(), self.site, e)
示例#6
0
    def run(self):
        if not self.endPoint:
            methodToRun = self.runEflash
        elif self.endPoint == 'xml':
            methodToRun = self.runSitemap
        elif self.endPoint == 'atom':
            methodToRun = self.runAtom
        elif self.endPoint == 'json':
            methodToRun = self.runJson
        elif self.endPoint == 'json2':
            methodToRun = self.runJson2

        while True:
            try:
                methodToRun()
                self.misc()
            except Exception as e:
                print(u.getDatetime(), self.site, e)
示例#7
0
    proxies = u.proxies()
    random.shuffle(proxies)

    with open('sites.json') as sitemaps_json:
        start = time.time()
        print("Attempting to initialize sitemap data...")
        sitemaps = json.load(sitemaps_json)
        sitemaps_length = len(sitemaps['sitemaps'])
        print(str(sitemaps_length) + " sitemap(s) detected.")
        data = [0 for x in range(sitemaps_length)]

    for i in range(sitemaps_length):
        try:
            data[i] = Initial(sitemaps['sitemaps'][i], proxies[i]).sendSites()
            print(u.getDatetime(), i,
                  'Initialized {}'.format(sitemaps['sitemaps'][i]['sitemap']),
                  len(data[i]))
            # print(data[i])
        except Exception as e:
            print(e)
    print("Sitemap data initialized.")
    print(time.time() - start)

    try:

        for i in range(sitemaps_length):
            Process(target=ShopifyMonitor(data[i],
                                          sitemaps['sitemaps'][i]).run,
                    name=sitemaps['sitemaps'][i]['name']).start()
示例#8
0
 def callback(self, x):
     try:
         self.data[x.result()['href']] = x.result()
     except:
         print(u.getDatetime(), self.site, 'CF callback exception.')
示例#9
0
    def getHiddenStockSites(self, key, oldStockCount, p):
        try:
            sites = self.s.get(key['href'] + f'?_={uuid4().hex}',
                               proxies={'https': 'http://{}'.format(p)},
                               timeout=5)
            sites.raise_for_status()

            r = [line for line in sites.text.split('\n') if '{"id"' in line]
            if 'kith.com' in self.site:
                r = r[2][:-1]
            r = json.loads(r)
            vendor = r['vendor'].lower()
            stockCount = 'IN STOCK/HIDDEN' if r['available'] else 'OOS/HIDDEN'

            if oldStockCount == 'OOS/HIDDEN' and stockCount == 'IN STOCK/HIDDEN':
                self.sendToSlack(key['href'], key['updated'], r, stockCount,
                                 vendor)
            elif oldStockCount == 'NEW':
                if (any(brand in vendor
                        for brand in self.brands) or len(vendor) < 3
                    ) and key['updated'].split('T')[0] >= u.getDate():
                    self.sendToSlack(key['href'], key['updated'], r,
                                     stockCount, vendor)

            return {
                'href': key['href'],
                'updated': key['updated'],
                'vendor': vendor,
                'stockCount': stockCount
            }

        except requests.HTTPError as e:
            print(u.getDatetime(), e)
            if sites.status_code == 430:
                return self.getHiddenStockSites(
                    key, oldStockCount, random.choice(u.getInfoProxy()))
            if sites.status_code == 404:
                return {
                    'href': key['href'],
                    'updated': key['updated'],
                    'vendor': 'error',
                    'stockCount': 'OOS/HIDDEN'
                }
            else:
                return {
                    'href':
                    key['href'],
                    'updated':
                    key['updated'],
                    'vendor':
                    'error',
                    'stockCount':
                    'OOS/HIDDEN' if oldStockCount == 'NEW' else oldStockCount
                }
        except IndexError as e:
            print(u.getDatetime(), key['href'], e)
            return {
                'href':
                key['href'],
                'updated':
                key['updated'],
                'vendor':
                'error',
                'stockCount':
                'OOS/HIDDEN' if oldStockCount == 'NEW' else oldStockCount
            }
        except Exception as e:
            print(u.getDatetime(), p, e)
            return {
                'href':
                key['href'],
                'updated':
                key['updated'],
                'vendor':
                'error',
                'stockCount':
                'OOS/HIDDEN' if oldStockCount == 'NEW' else oldStockCount
            }
示例#10
0
    def getHiddenSites(self, key, oldStockCount, p):
        try:
            sites = self.s.get(key['href'] + f'?_={uuid4().hex}',
                               proxies={'https': 'http://{}'.format(p)},
                               timeout=5)
            sites.raise_for_status()

            if self.site in [
                    'https://lessoneseven.com/',
                    'https://www.thegoodlifespace.com/'
            ]:
                r = [
                    line for line in sites.text.split('\n') if '{&quot' in line
                ]
            else:
                r = [
                    line for line in sites.text.split('\n') if '{"id"' in line
                ]

            if 'trophyroomstore.com' in self.site:
                r = r[4].split(' = ')[1][:-1]

            elif 'thedarksideinitiative.com' in self.site:
                r = r[3].split(' = ')[1].replace(';</script>', '')

            elif 'featuresneakerboutique.com' in self.site:
                r = r[2].replace('product: ', '').strip()[:-1]
                if ' = ' in r:
                    r = r.split(' = ')[1]

            elif 'notre-shop.com' in self.site or 'alifenewyork.com' in self.site:
                r = r[2].replace('product: ', '').strip()[:-1]

            elif 'blendsus.com' in self.site:
                r = r[2].split('product: ')[1].replace(', onVariantSelected:',
                                                       '')

            elif 'octobersveryown.com' in self.site:
                r = r[2].split(' = ')[1].strip()[:-1]

            elif 'undefeated.com' in self.site:
                r = r[2].split('product = ')[1].strip()[:-1]

            elif 'xhibition.co' in self.site:
                r = r[3]

            elif 'hanon-shop.com' in self.site:
                r = r[2].split('{ product: ')[1].split(', onV')[0]

            elif 'thegoodlifespace.com' in self.site:
                r = r[0].split('="')[1].split('"')[0].replace('&quot;', '"')

            elif 'lessoneseven.com' in sites.url:
                r = r[0].split('"')[1].replace('&quot;', '"')

            elif 'doverstreetmarket.com' in self.site or 'deadstock.ca' in self.site or 'stashedsf.com' in self.site or\
                    'worldofhombre.com' in self.site:
                r = r[2]

            r = json.loads(r)
            vendor = r['vendor'].lower()
            stockCount = sum(variant['inventory_quantity']
                             for variant in r['variants']
                             if variant['inventory_quantity'] >= 1)

            if oldStockCount == 0 and stockCount >= 1:
                self.sendToSlack(key['href'], key['updated'], r, stockCount,
                                 vendor)

            elif oldStockCount == 'NEW':
                if not self.endPoint or (
                    (any(brand in vendor
                         for brand in self.brands) or len(vendor) < 3)
                        and key['updated'].split('T')[0] >= u.getDate()):
                    self.sendToSlack(key['href'], key['updated'], r,
                                     stockCount, vendor)

            return {
                'href': key['href'],
                'updated': key['updated'],
                'vendor': vendor,
                'stockCount': stockCount
            }

        except requests.HTTPError as e:
            print(u.getDatetime(), e)
            if sites.status_code == 430:
                self.getHiddenSites(key, oldStockCount,
                                    random.choice(u.getInfoProxy()))
            if sites.status_code == 404:
                return {
                    'href': key['href'],
                    'updated': key['updated'],
                    'vendor': 'error',
                    'stockCount': 0
                }
            else:
                return {
                    'href': key['href'],
                    'updated': key['updated'],
                    'vendor': 'error',
                    'stockCount':
                    0 if oldStockCount == 'NEW' else oldStockCount
                }
        except IndexError as e:
            print(u.getDatetime(), key['href'], e)
            return {
                'href': key['href'],
                'updated': key['updated'],
                'vendor': 'error',
                'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount
            }
        except Exception as e:
            print(u.getDatetime(), p, e)
            return {
                'href': key['href'],
                'updated': key['updated'],
                'vendor': 'error',
                'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount
            }
示例#11
0
    def getProdInfo(self, key, oldStockCount, p):
        try:
            sites = self.s.get(key['href'] + f'.json?_={uuid4().hex}',
                               proxies={'https': 'http://{}'.format(p)},
                               timeout=5)
            sites.raise_for_status()

            vendor = sites.json()['product']['vendor'].lower()
            try:
                stockCount = sum(
                    variant['inventory_quantity']
                    for variant in sites.json()['product']['variants']
                    if variant['inventory_quantity'] >= 1)
            except:
                stockCount = 'HIDDEN'

            if (oldStockCount == 0
                    and stockCount >= 1) or oldStockCount == 'HIDDEN':
                self.sendToSlack(key['href'], key['updated'], sites.json(),
                                 stockCount, vendor)
            elif oldStockCount == 'NEW':
                if (any(brand in vendor
                        for brand in self.brands) or len(vendor) < 3
                    ) and key['updated'].split('T')[0] >= u.getDate():
                    self.sendToSlack(key['href'], key['updated'], sites.json(),
                                     stockCount, vendor)

            return {
                'href': key['href'],
                'updated': key['updated'],
                'vendor': vendor,
                'stockCount': stockCount
            }

        except requests.HTTPError as e:
            print(u.getDatetime(), e)
            if sites.status_code == 430:
                return self.getProdInfo(key, oldStockCount,
                                        random.choice(u.getInfoProxy()))
            if sites.status_code == 404:
                return {
                    'href': key['href'],
                    'updated': key['updated'],
                    'vendor': 'error',
                    'stockCount': 0
                }
            else:
                return {
                    'href': key['href'],
                    'updated': key['updated'],
                    'vendor': 'error',
                    'stockCount':
                    0 if oldStockCount == 'NEW' else oldStockCount
                }
        except IndexError as e:
            print(u.getDatetime(), key['href'], e)
            return {
                'href': key['href'],
                'updated': key['updated'],
                'vendor': 'error',
                'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount
            }
        except Exception as e:
            print(u.getDatetime(), p, e)
            return {
                'href': key['href'],
                'updated': key['updated'],
                'vendor': 'error',
                'stockCount': 0 if oldStockCount == 'NEW' else oldStockCount
            }