示例#1
0
    def getSkuId(self, url):

        headers = {'User-Agent': self.userAgent}
        r = requests.get(url, headers=headers)

        if 200 != r.status_code:
            print 'Unable to get long URL for "', skuid, '" with an error (', r.status_code, '):\n', r.text
            return None

        url = getMatchString(r.content, r"hrl='(.*?)'")
        #print 'Long url:', url

        headers = {'User-Agent': self.userAgent}
        r = requests.get(url, headers=headers)

        if 200 != r.status_code:
            print 'Unable to get information page for "', skuid, '" with an error (', r.status_code, '):\n', r.text
            return None

        data = getMatchString(r.content, r'window._itemOnly = (.*?);')
        #print data

        obj = json.loads(data.decode('utf-8', 'ignore'))
        obj = obj.pop('item')

        skuId = obj.pop('areaSkuId')

        return skuId
示例#2
0
def getSlogan(skuid):

    path = OutputPath.getDataPath(skuid, 'html')

    SKU_MAIN_URL_TEMPLATE = 'http://item.m.jd.com/product/{}.html'
    url = SKU_MAIN_URL_TEMPLATE.format(skuid)

    ret = Network.saveGetUrl(path, url)
    #print 'Update', path, ':', ret

    if ret < 0:
        return None

    #PATTERN = r'<div class="prod-act">(.*?)</div>'
    PATTERN = r'<div class="prod-act">(.*?)<'

    with open(path) as fp:
        content = fp.read()

        slogan = getMatchString(content, PATTERN)
        if slogan is not None and not isinstance(slogan, unicode):
            slogan = unicode(slogan, errors='ignore')
        return slogan

    return None
示例#3
0
文件: qwd.py 项目: wean/coupon
    def getShareUrl(self, skuid):

        url = self.shareUrl.format(skuid)

        headers = {'User-Agent': self.userAgent}

        for retries in range(3):

            if retries is not 0:

                self.reset()
                time.sleep(1)

            if self.loginType is 1:

                if not self.plogin(retries):
                    continue

                cookies = self.pCookies

            else:
                if not self.login():
                    continue

                cookies = self.cookies

            try:
                r = requests.get(url, cookies=cookies, headers=headers)
            except Exception as e: 
                print 'Unable to get sharing URL for "', skuid, '" with an error:\n', e
                continue

            if 200 != r.status_code:
                print 'Unable to get sharing URL for "', skuid, '" with an error (', r.status_code, '):\n', r.text
                continue

            content = r.content.replace('\n', '')
            data = getMatchString(content, r'itemshare\((.*?)\)')

            obj = json.loads(data.decode('utf-8', 'ignore'))
            retCode = int(obj.pop('retCode'))

            if retCode is 0:
                return obj.pop('skuurl')

            elif 1000 == retCode: # Unlogin
                print 'Unlogin to get sharing URL for "', skuid, '" with an error (', retCode, '):\n', r.text
                continue

            print 'Unable to get sharing URL for "', skuid, '" with an error (', retCode, '):\n', r.text
            return None

        self.reset()

        return None
示例#4
0
    def getSlogan(self):

        path = OutputPath.getDataPath(self.skuid, 'html')
        url = Infor.SKU_MAIN_URL_TEMPLATE.format(self.skuid)

        ret = Network.saveGetUrl(path, url)

        if ret < 0:
            return None

        with open(path) as fp:
            content = fp.read()

            slogan = getMatchString(content, Infor.SLOGAN_PATTERN)
            if slogan is not None and not isinstance(slogan, unicode):
                slogan = unicode(slogan, errors='ignore')
            return slogan

        return None
示例#5
0
    def getShareUrl(self, skuid):

        if self.loginMethod is 1:
            self.plogin()
            cookies = self.pCookies
        else:
            self.login()
            cookies = self.cookies

        url = self.shareUrl.format(skuid)

        headers = {'User-Agent': self.userAgent}

        try:
            r = requests.get(url, cookies=cookies, headers=headers)
        except Exception as e:
            print 'Unable to get sharing URL for "', skuid, '" with an error:\n', e
            return None

        if 200 != r.status_code:
            print 'Unable to get sharing URL for "', skuid, '" with an error (', r.status_code, '):\n', r.text
            return None

        content = r.content.replace('\n', '')
        data = getMatchString(content, r'itemshare\((.*?)\)')

        obj = json.loads(data.decode('utf-8', 'ignore'))
        retCode = int(obj.pop('retCode'))

        if retCode is not 0:
            print 'Unable to get sharing URL for "', skuid, '" with an error (', r.status_code, '):\n', r.text

            # XXX: Reset but let this message failed because of less complicated logistic. It will re-login
            #      when call the function again.
            self.reset()

            return None

        return obj.pop('skuurl')
示例#6
0
    def getImage(self):

        path = OutputPath.getDataPath(self.skuid, 'html')
        url = Infor.SKU_MAIN_URL_TEMPLATE.format(self.skuid)

        ret = Network.saveGetUrl(path, url)

        if ret < 0:
            return None

        with open(path) as fp:
            content = fp.read()

            m = re.search(Infor.IMAGE_PATTERN, content)

            if m is None:
                return None

            url = getMatchString(m.group(0), Infor.MARK_PATTERN)
            return url

        return None
示例#7
0
    def parseImpl(self, content):

        for i in range(1):

            start = content.find('#')
            if start < 0:
                break

            end = content.rfind('#')
            if end is start:
                break

            return content[start:end + 1]

        for rule in self.rules:
            value = getMatchString(content, rule)

            if value is not None:

                value = value.strip()
                if len(value) > 0:
                    return '#{}#'.format(value)

        return None