示例#1
0
    def getPage(self, baseUrl, addParams={}, post_data=None):
        printDBG('etap1')
        if not post_data: post_data = (None, None)
        sts, data = self.cm.getPage(baseUrl, addParams, post_data[1])
        if not data: data = ''
        if '!![]+!![]' in data:
            try:
                printDBG('etap2')
                oRequestHandler = cRequestHandler(baseUrl)
                if post_data:
                    oRequestHandler.setRequestType(
                        cRequestHandler.REQUEST_TYPE_POST)
                    oRequestHandler.addParametersLine(post_data[0])
                data = oRequestHandler.request()
                sts = True
                cook = GestionCookie().Readcookie('www_dpstream_top')
                self.cookieHeader = str(cook)
                if ';' in cook: cook_tab = cook.split(';')
                else: cook_tab = cook
                cj = self.cm.getCookie(self.COOKIE_FILE)
                for item in cook_tab:
                    if '=' in item:
                        cookieKey, cookieValue = item.split('=')
                        cookieItem = cookielib.Cookie(
                            version=0,
                            name=cookieKey,
                            value=cookieValue,
                            port=None,
                            port_specified=False,
                            domain='.' + self.cm.getBaseUrl(baseUrl, True),
                            domain_specified=True,
                            domain_initial_dot=True,
                            path='/',
                            path_specified=True,
                            secure=False,
                            expires=time.time() + 3600 * 48,
                            discard=True,
                            comment=None,
                            comment_url=None,
                            rest={'HttpOnly': None},
                            rfc2109=False)
                        cj.set_cookie(cookieItem)
                cj.save(self.COOKIE_FILE, ignore_discard=True)

                printDBG('ffffff' + self.cookieHeader)
            except Exception, e:
                printDBG('ERREUR:' + str(e))
                addParams['cloudflare_params'] = {
                    'domain': self.up.getDomain(baseUrl),
                    'cookie_file': self.COOKIE_FILE,
                    'User-Agent': self.USER_AGENT
                }
                sts, data = self.cm.getPageCFProtection(
                    baseUrl, addParams, post_data[1])
示例#2
0
    def GetHeadercookie(self,url):
        Domain = re.sub(r'https*:\/\/([^/]+)(\/*.*)', '\\1', url)
        cook = GestionCookie().Readcookie(Domain.replace('.', '_'))
        if cook == '':
            return ''

        return '|' + urllib.urlencode({'User-Agent': UA, 'Cookie': cook})
示例#3
0
	def getPage1(self,baseUrl, addParams = {}, post_data = None):
		if addParams == {}: addParams = dict(self.defaultParams) 
		sts, data = self.cm.getPage(baseUrl,addParams,post_data)
		if not data: data=strwithmeta('',{})
		printDBG('ddddaaattttaaaa'+str(data.meta))
		#printDBG('ddddaaattttaaaa'+data)
		if ('!![]+!![]' in data) or (data.meta.get('status_code',0)==503):
			try:
				if os.path.exists(self.COOKIE_FILE):
					os.remove(self.COOKIE_FILE)
					printDBG('cookie removed')
				printDBG('Start CLoudflare  Vstream methode')
				oRequestHandler = cRequestHandler(baseUrl)
				if post_data:
					post_data_vstream = ''
					for key in post_data:
						if post_data_vstream=='':
							post_data_vstream=key+'='+post_data[key]
						else:
							post_data_vstream=post_data_vstream+'&'+key+'='+post_data[key]					
					oRequestHandler.setRequestType(cRequestHandler.REQUEST_TYPE_POST)
					oRequestHandler.addParametersLine(post_data_vstream)					
				data = oRequestHandler.request()
				sts = True
				printDBG('cook_vstream_file='+self.up.getDomain(baseUrl).replace('.','_'))
				cook = GestionCookie().Readcookie(self.up.getDomain(baseUrl).replace('.','_'))
				printDBG('cook_vstream='+cook)
				#printDBG('cook_vstream='+data)
				if ';' in cook: cook_tab = cook.split(';')
				else: cook_tab = cook
				cj = self.cm.getCookie(self.COOKIE_FILE)
				for item in cook_tab:
					if '=' in item:	
						printDBG('item='+item)		
						cookieKey, cookieValue = item.split('=')
						cookieItem = cookielib.Cookie(version=0, name=cookieKey, value=cookieValue, port=None, port_specified=False, domain='.'+self.cm.getBaseUrl(baseUrl, True), domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=time.time()+3600*48, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
						cj.set_cookie(cookieItem)
				cj.save(self.COOKIE_FILE, ignore_discard = True)
			except Exception, e:
				printDBG('ERREUR:'+str(e))
				printDBG('Start CLoudflare  E2iplayer methode')
				addParams['cloudflare_params'] = {'domain':self.up.getDomain(baseUrl), 'cookie_file':self.COOKIE_FILE, 'User-Agent':self.USER_AGENT}
				sts, data = self.cm.getPageCFProtection(baseUrl, addParams, post_data)	
示例#4
0
    def GetHtml(self, url, htmlcontent='', cookies='', postdata=None, Gived_headers=''):

        # Memorise headers
        self.Memorised_Headers = Gived_headers

        # Memorise postdata
        self.Memorised_PostData = postdata

        # Memorise cookie
        self.Memorised_Cookies = cookies
        # VSlog(cookies)

        # cookies in headers?
        if Gived_headers != '':
            if Gived_headers.get('Cookie', None):
                if cookies:
                    self.Memorised_Cookies = cookies + '; ' + Gived_headers.get('Cookie')
                else:
                    self.Memorised_Cookies = Gived_headers['Cookie']


        self.hostComplet = re.sub(r'(https*:\/\/[^/]+)(\/*.*)', '\\1', url)
        self.host = re.sub(r'https*:\/\/', '', self.hostComplet)
        self.url = url

        cookieMem = GestionCookie().Readcookie(self.host.replace('.', '_'))
        if not (cookieMem == ''):
            if not (self.Memorised_Cookies):
                cookies = cookieMem
            else:
                cookies = self.Memorised_Cookies + '; ' + cookieMem

        data = {}
        if postdata:
            method = 'POST'
            # Need to convert data to dictionnary
            d = postdata.split('&')
            for dd in d:
                ddd = dd.split('=')
                data[ddd[0]] = ddd[1]
        else:
            method = 'GET'


        s = cloudscraper.create_scraper(browser={'custom': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0'})

        r = s.request(method,url, headers=self.SetHeader(), cookies=self.ParseCookies(cookies), data=data)
        # r = s.request(method, url)
        MemCookie = r.cookies.get_dict()

        if r:
            sContent = r.text.encode('utf-8')
            self.RedirectionUrl = r.url
            self.Header = r.headers
        else:
            sContent = ''
            # self.RedirectionUrl = r.url
            # self.Header = r.headers
            MemCookie = {}
            # r.cookies.clear()
            GestionCookie().DeleteCookie(self.host.replace('.', '_'))

        # fh = open('c:\\test.txt', 'w')
        # fh.write(sContent)
        # fh.close()

        # Memorisation des cookies
        c = ''
        cookie = MemCookie
        if cookie:
            for i in cookie:
                c = c + i + '=' + cookie[i] + ';'
            # Write them
            GestionCookie().SaveCookie(self.host.replace('.', '_'), c)

        return sContent
示例#5
0
    def GetHtml(self,
                url,
                htmlcontent='',
                cookies='',
                postdata=None,
                Gived_headers=''):

        #Memorise headers
        self.Memorised_Headers = Gived_headers

        #Memorise postdata
        self.Memorised_PostData = postdata

        #Memorise cookie
        self.Memorised_Cookies = cookies
        #print(cookies)

        #cookies in headers ?
        if Gived_headers != '':
            if Gived_headers.get('Cookie', None):
                if cookies:
                    self.Memorised_Cookies = cookies + '; ' + Gived_headers.get(
                        'Cookie')
                else:
                    self.Memorised_Cookies = Gived_headers['Cookie']

        self.hostComplet = re.sub(r'(https*:\/\/[^/]+)(\/*.*)', '\\1', url)
        self.host = re.sub(r'https*:\/\/', '', self.hostComplet)
        self.url = url

        cookieMem = GestionCookie().Readcookie(self.host.replace('.', '_'))
        if not (cookieMem == ''):
            if not (self.Memorised_Cookies):
                cookies = cookieMem
            else:
                cookies = self.Memorised_Cookies + '; ' + cookieMem

        data = {}
        if postdata:
            method = 'POST'
            #Need to convert data to dictionnary
            d = postdata.split('&')
            for dd in d:
                ddd = dd.split('=')
                data[ddd[0]] = ddd[1]
        else:
            method = 'GET'

        s = CloudflareScraper()

        r = s.request(method,
                      url,
                      headers=self.SetHeader(),
                      cookies=self.ParseCookies(cookies),
                      data=data)
        if r:
            sContent = r.text.encode("utf-8")
        else:
            sContent = ''
            s.MemCookie = ''
            GestionCookie().DeleteCookie(self.host.replace('.', '_'))

        #fh = open('c:\\test.txt', "w")
        #fh.write(sContent)
        #fh.close()

        #Memorisation des cookies
        c = ''
        cookie = s.MemCookie
        if cookie:
            for i in cookie:
                c = c + i + '=' + cookie[i] + ';'
            #Write them
            GestionCookie().SaveCookie(self.host.replace('.', '_'), c)

        return sContent