def get_eleven(): ''' Get "eleven" from oceanball Return ------ eleven: string parameter to get detail info ''' oceanball, cas = get_oceanball() ocean = requests.get(oceanball).content.decode('utf8') ocean = ocean.replace('eval', 'JSON.stringify') ctxt = PyV8.JSContext() ctxt.__enter__() ocean = ctxt.eval(ocean) ocean = eval(ocean) ocean = ocean.replace(cas, 'eleven=' + cas) ctxt = PyV8.JSContext() with PyV8.JSContext() as ctxt: ctxt.eval( 'var hotel_id = "433176"; var site = {}; site.getUserAgent = function(){}; var Image = function(){}; var window = {}; window.document = {body:{innerHTML:"1"}, documentElement:{attributes:{webdriver:"1"}}, createElement:function(x){return {innerHTML:"1"}}}; var document = window.document;window.navigator = {"appCodeName":"Mozilla", "appName":"Netscape", "language":"zh-CN", "platform":"Win"}; window.navigator.userAgent = site.getUserAgent(); var navigator = window.navigator; window.location = {}; window.location.href = "http://hotels.ctrip.com/hotel/"+hotel_id+".html"; var location = window.location;' ) # ctxt.eval('var div = {innerHTML:"1"};') ctxt.eval( 'var navigator = {userAgent:{indexOf: function(x){return "1"}}, geolocation:"1"}' ) ctxt.eval('var %s = function(x){return x()}' % cas) ctxt.eval(ocean) vars = ctxt.locals eleven = vars.eleven return eleven
def __init__(self, pure=False, context=None): """Setup the context""" if not pure and not context: console = Console() context = {'XMLHttpRequest': XMLHttpRequest, 'console': console} if context: self.context = PyV8.JSContext(context) else: self.context = PyV8.JSContext() self.context.enter() if not pure: self.eval(COMPAT)
def __init__(self): if HAS_PYV8: with PyV8.JSLocker(): self.ctx = PyV8.JSContext(GlobalContext()) self.ctx.enter() self.init() self.ctx.leave()
def js(self): "Returns JS context" if not self._ctx: glue = u'\n' if self._use_unicode else '\n' core_src = [ self.read_js_file(make_path(f)) for f in self._core_files ] self._ctx = PyV8.JSContext() self._ctx.enter() self._ctx.eval(glue.join(core_src)) # load default snippets self._ctx.locals.pyLoadSystemSnippets( self.read_js_file(make_path('snippets.json'))) # expose some methods self._ctx.locals.log = js_log self._ctx.locals.pyFile = File() if self._contrib: for k in self._contrib: self._ctx.locals[k] = self._contrib[k] if self._should_load_extension: self._ctx.locals.pyResetUserData() self.load_extensions() self._should_load_extension = False return self._ctx
def get_tk(self): with PyV8.JSContext() as ctxt: # 恢复'\x'的转义功能 TKK = codecs.getdecoder("unicode_escape")(self.get_TKK())[0] func = ctxt.eval(TKK) ctxt.eval(""" var b = function (a, b) { for (var d = 0; d < b.length - 2; d += 3) { var c = b.charAt(d + 2), c = "a" <= c ? c.charCodeAt(0) - 87 : Number(c), c = "+" == b.charAt(d + 1) ? a >>> c : a << c; a = "+" == b.charAt(d) ? a + c & 4294967295 : a ^ c } return a } var tk = function (a,TKK) { for (var e = TKK.split("."), h = Number(e[0]) || 0, g = [], d = 0, f = 0; f < a.length; f++) { var c = a.charCodeAt(f); 128 > c ? g[d++] = c : (2048 > c ? g[d++] = c >> 6 | 192 : (55296 == (c & 64512) && f + 1 < a.length && 56320 == (a.charCodeAt(f + 1) & 64512) ? (c = 65536 + ((c & 1023) << 10) + (a.charCodeAt(++f) & 1023), g[d++] = c >> 18 | 240, g[d++] = c >> 12 & 63 | 128) : g[d++] = c >> 12 | 224, g[d++] = c >> 6 & 63 | 128), g[d++] = c & 63 | 128) } a = h; for (d = 0; d < g.length; d++) a += g[d], a = b(a, "+-a^+6"); a = b(a, "+-3^+b+-f"); a ^= Number(e[1]) || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + "." + (a ^ h) } """) vars = ctxt.locals Tkk = vars.tk tk = Tkk(self.content,func) print(tk) return tk
def analyse(js, tree): """ Main function called from pdfrankenstein. Analyzes javascript in order to deobfuscate the code. :param js: String of code to analyze :param tree: Tree xml object to use as reference for objects called from the code. :return: String of deobfuscated code """ if not PyV8: return '' with PyV8.JSIsolate(): context = PyV8.JSContext() context.enter() context.eval('evalCode = \'\';') context.eval( 'evalOverride = function (expression) { evalCode += expression; return;}' ) context.eval('eval=evalOverride') try: if tree is not None: create_objs(context, tree) ret = eval_loop(js, context) context.leave() if ret == None: return '' else: return ret except Exception as e: context.leave() # return 'Error with analyzing JS: ' + e.message return ''
def imitate_cookie(self, r_1): print '**********************************************imitate_cookie' ctxt = PyV8.JSContext() ctxt.enter() soup = BeautifulSoup(r_1.text, 'lxml') # print soup script1 = soup.select('script')[0].text # print script1 script2 = ("(function(){" + script1.replace('eval(y', 'return (y') + "})").encode('utf-8') # print '-'*100 # print script2 func = ctxt.eval(script2) script3 = func() # print script3 script4 = script3.replace("while(window._phantom||window.__phantomas){};", "") \ .replace("if((function(){try{return !!window.addEventListener;}catch(e){return false;}})())" "{document.addEventListener('DOMContentLoaded',l,false);}else{document.attachEvent('onreadystatechange',l);}", '') \ .replace(r"var h=document.createElement('div');h.innerHTML='<a href=\'/\'>x</a>';h=h.firstChild.href;", "var h='http://www.gsxt.gov.cn/';") script5 = re.sub("document.cookie=.+\\);", 'return dc;', re.sub("setTimeout[^;]+;", '', script4)) + "return l();" # print script5.replace('return return', 'return') script6 = "(function(){" + script5 + "})" func2 = ctxt.eval(script6) cookie = func2() self.session.cookies.set(*cookie.split('='))
def wapple(self, id, url): ctxt = PyV8.JSContext() ctxt.enter() f1 = open(os.path.join(self.file_dir, 'js/wappalyzer.js')) f2 = open(os.path.join(self.file_dir, 'js/driver.js')) ctxt.eval(f1.read()) ctxt.eval(f2.read()) f1.close() f2.close() host = urlparse(url).hostname response = requests.get(url) html = response.text headers = dict(response.headers) data = {'host': host, 'url': url, 'html': html, 'headers': headers} apps = json.dumps(self.apps) categories = json.dumps(self.categories) results = ctxt.eval("w.apps = %s; w.categories = %s; w.driver.data = %s; w.driver.init();" % (apps, categories, json.dumps(data))) #print results answers = json.loads(results) print "{0}: {1} - {2}".format(id, url, answers.__len__()) for app, thing in answers.items(): categories = "" version = thing["version"] for c in thing["categories"]: categories = c + "," self.cur.execute( feature_insert.format(id, app, categories.strip(","), version) ) self.con.commit()
def get_javascript_tests_result(self, tests_lines=None, json_obj=None): """ Summary: 对js编写的用例进行断言 Args: tests_lines: 测试语句,用分号分隔 :return: """ json_obj = json_obj or self.get_json_response_obj() if json_obj is None: return None tests_lines = tests_lines or self.get_case_tests() tests_lines = tests_lines.encode('utf8') if isinstance( tests_lines, unicode) else tests_lines js_str = """ (function(jsbody){ var responseBody = jsbody; var tests = new Object();""" + tests_lines + """ return tests }) """ test_dic = dict() with PyV8.JSLocker(): js_context = PyV8.JSContext() js_context.enter() test_func = js_context.eval(js_str.decode('utf8')) test_jsobj = test_func(json_obj) for key in test_jsobj.keys(): test_dic[key] = test_jsobj[key] js_context.leave() return test_dic
def jsHandle(js): import PyV8 ctxt = PyV8.JSContext() ctxt.enter() def insert(original, new, pos): '''Inserts new inside original at pos.''' return original[:pos] + new + original[pos:] s = js[31:] s = s[:(len(s) - 9)] s = 'var location={};var window={};' + s pos = s.rfind('location') try: f = ctxt.eval(s) location = ctxt.eval('location') if (s[pos + 8] == '='): #location=...location直接就是跳转地址 return location attri = location.keys()[ 0] #location.href=函数,或者location[href]=函数,跳转地址在参数中 res = eval('location.' + attri) return res except: pos = s.find('(', pos) s = insert(s, '=', pos) #location[]();或者location.href();需要加=,跳转地址在参数中 f = ctxt.eval(s) location = ctxt.eval('location') attri = location.keys()[0] res = eval('location.' + attri) return res
def run( scriptfile, opts ): """ Execute javascript, setup cherrypy and route url paths to registered callbacks. """ levelname = opts.get('loglevel','DEBUG').upper() if not hasattr(logging,levelname): return "Unknown log level %s, must be DEBUG|INFO|WARN" % levelname logging.basicConfig(flename=opts.get('logfile','/dev/stdout'), level=getattr(logging,levelname), format="%(message)s" ) basedir = os.path.dirname(os.path.abspath(scriptfile)) require.RequirePath = [basedir+"/", basedir+"/.d-mods/"] + \ require.RequirePath code = open(scriptfile).read() api = RootAPI() root_context = PyV8.JSContext( api ) root_context.enter() _init_jscontext( root_context, scriptfile ) try: root_context.eval( code ) except KeyboardInterrupt: pass root_context.leave()
def analyze(self): ctxt = PyV8.JSContext() ctxt.enter() f1 = open(os.path.join(self.file_dir, 'js/wappalyzer.js')) f2 = open(os.path.join(self.file_dir, '../php/js/driver.js')) ctxt.eval(f1.read()) ctxt.eval(f2.read()) f1.close() f2.close() host = urlparse(self.url).hostname response = requests.get(self.url, verify=False) html = response.text headers = dict(response.headers) data = { 'host': host, 'url': self.url, 'html': html, 'headers': headers } apps = json.dumps(self.apps) categories = json.dumps(self.categories) return ctxt.eval( "w.apps = %s; w.categories = %s; w.driver.data = %s; w.driver.init();" % (apps, categories, json.dumps(data)))
def run(self): with PyV8.JSContext() as ctx: try: ctx.eval(self.script) self.result = ctx.eval(self.result_expr) except: logging.exception('JS error for script %s' % self.script)
def solve_cf_challenge(self, resp, headers, **kwargs): headers = headers.copy() url = resp.url parsed = urlparse(url) domain = parsed.netloc page = resp.content kwargs.pop("params", None) # Don't pass on params try: # Extract the arithmetic operation challenge = re.search(r'name="jschl_vc" value="(\w+)"', page).group(1) builder = re.search(r"setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n", page).group(1) builder = re.sub(r"a\.value =(.+?) \+ .+?;", r"\1", builder) builder = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", builder) except AttributeError: # Something is wrong with the page. This may indicate Cloudflare has changed their # anti-bot technique. If you see this and are running the latest version, # please open a GitHub issue so I can update the code accordingly. raise IOError("Unable to parse Cloudflare anti-bots page. Try upgrading cfscrape, or " "submit a bug report if you are running the latest version.") # Lock must be added explicitly, because PyV8 bypasses the GIL with PyV8.JSLocker(): with PyV8.JSContext() as ctxt: # Safely evaluate the Javascript expression answer = str(int(ctxt.eval(builder)) + len(domain)) params = {"jschl_vc": challenge, "jschl_answer": answer} submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed.scheme, domain) headers["Referer"] = url return requests.get(submit_url, params=params, headers=headers, **kwargs)
def __init__(self): self.baseUrl = '' self.error_count = 0 self.source_name = '天津市公共资源交易网' self.addr_id = '408' self.category = category self.xpath_rule = { 'list_page' : '//div[@class="article-content"]/ul/li', 'title_rule' : './div/a//text()', 'url_rule' : './div/a/@href', 'web_time_rule' : './div/div/text()', # 'content_rule' : r'<div class="content" id="zoom">(.*?)<!-- bottom begin-->' } self.headers = { 'Host': 'ggzy.xzsp.tj.gov.cn', 'Connection': 'keep-alive', 'Referer': 'http://ggzy.xzsp.tj.gov.cn/jyxxgcjs/index.jhtml' } self.execute = PyV8.JSContext() self.execute.enter() with open('utils/ase_encryption.js')as f: self.a = f.read() self.start_urls = [ # 政府采购 1767页 每天更新跨度2页 公告 更正 结果都在一起 ('招标公告', 'http://ggzy.xzsp.tj.gov.cn/jyxxzfcg/index_{}.jhtml', 3), # 工程建设 9016 每天更新跨度5页 公告 更正 结果都在一起 ('招标公告', 'http://ggzy.xzsp.tj.gov.cn/jyxxgcjs/index_{}.jhtml', 3), # 医疗采购 38 每天更新1页 公告 更正 结果都在一起 ('招标公告', 'http://ggzy.xzsp.tj.gov.cn/jyxxyy/index_{}.jhtml', 3), ]
def download(path, chapter='', pagenum=0): file_path = os.path.join(path, '%03d.jpg' % pagenum) if os.path.isfile(file_path): return True url = url_gen(chapter, pagenum) myheaders = copy.copy(headers) myheaders['Referer'] = url fun = url + \ 'chapterfun.ashx?cid=%d&page=%d&key=&language=1>k=6' % ( extract_id(chapter), pagenum) r1 = requests.get(fun, headers=myheaders) if r1.status_code != 200: return False with PyV8.JSContext() as ctxt: ctxt.enter() func = ctxt.eval(r1.text[4:]) func2 = ctxt.eval(func) html = str(func2).split(',')[0] r = requests.get(html, headers=myheaders) if r.status_code == 404: print 'Blocked' return False else: if not os.path.exists(path): os.mkdir(path) with open(file_path, 'wb') as f: f.write(r.content) return True
def executeJS(js_func_string, arg): # 创建一个jsContext对象并进入 ctxt = PyV8.JSContext() ctxt.enter() # 然后eval一下想要执行的js的代码,或者包含你需要的js代码的源文件 func = ctxt.eval("({js})".format(js=js_func_string)) return func(arg)
def test_kuaidaili(): """对快代理上cookie加密的解决办法 @refer: https://zhuanlan.zhihu.com/p/25957793 """ kuai_url = "http://www.kuaidaili.com/proxylist/1/" # 首次访问获取动态加密的JS kuai_first_html = get_kuaidaili_html(kuai_url) if not kuai_first_html: return False # 执行js代码, 获取cookies信息 ctxt = PyV8.JSContext() ctxt.__enter__() js_path = BASE_DIR + os.sep + 'hm.js' js_data = None with open(js_path, 'r') as fd: js_data = fd.read() if not js_data: print('读取的hm.js文本为空') return False js_data_html = """ <!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <script type="text/javascript"> {} </script> </head> </html> """.format(js_data) import pdb pdb.set_trace() ctxt.eval(js_data) print('Success execute javascript code')
def parse_page(self, response): #from scrapy.shell import inspect_response #inspect_response(response, self) trs = response.xpath('//table[2]/tr[4]/td[1]/table/tr') _port_expr_values = response.xpath( '/html/body/script[1]/text()').extract_first() trs = trs[3:] if trs: for tr in trs: item = ProxyItem() try: _ip = tr.xpath( 'td[1]/font[2]/text()').extract_first().encode() _port_expr = trs[3].xpath('td[1]/font/script/text()' ).extract_first().encode().split( '"+')[1][:-1] _port_expr = _port_expr.split('+') with PyV8.JSContext() as env: env.eval(_port_expr_values) _port = '' for expr in _port_expr: _port += str(env.eval(expr)) _type = tr.xpath('td[3]/a/font/text()').extract_first() _id = hash(_ip + _port + _type + self.name) item['_id'] = _id item['source'] = self.name item['ip'] = _ip item['port'] = _port item['p_type'] = _type yield item except: pass
def get_jsl_clearance(req, post_cookie): req.encoding = 'utf-8' script = req.text.strip().replace("<script>", "") script = script.replace("</script>", "") script = script.replace(";eval", ";document.write") script = script.replace("\x00", "") class v8Doc(PyV8.JSClass): def write(self, s): global result result = s class Global(PyV8.JSClass): def __init__(self): self.document = v8Doc() glob = Global() ctxt = PyV8.JSContext(glob) ctxt.enter() ctxt.eval(script) script = result.replace("while(window._phantom||window.__phantomas){};", "") script = script.replace( "setTimeout('location.href=location.href.replace(/[\?|&]captcha-challenge/,\\\'\\\')',1500);", "") script = script.replace( "if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('DOMContentLoaded',l,false);}else{document.attachEvent('onreadystatechange',l);}", "l();") r = re.compile(r'document.cookie.*?\)\;') script = re.sub(r, 'document.write(dc)', script) ctxt.eval(script) name, value = result.split('=') post_cookie[name] = value return post_cookie
def parse_go_url(nodes, name, values, item): print '+++go url', nodes if not nodes or len(nodes) < 1: return node = nodes[0] jscode = node.text_content() # print 'js content' ,jscode # print type(jscode) jscode = u'code='+jscode[4:] # print type(jscode),jscode url = None obj = Global() with pyv8_jslocker: with PyV8.JSContext() as ctx: ctx.eval(jscode) code = ctx.locals.code # print type(code), code #ga('send','pageview'); #ga('send','event', i1 = code.index("ga('send','pageview');") i2 = code.index("ga('send','event',") uc = code[i1:i2] eq = uc.index('=') url = uc[eq+2:-2] # cs = code.split(';') # ctx.eval(code) print '----go ----------', url # url = ctx.locals.ewkqmp return url
def analyze_from_data(self, url, html, headers): logger.debug('Analyzing: %s' % url) ctxt = PyV8.JSContext() ctxt.enter() with open(settings.FILENAME_WAPPALIZER_JS) as f: ctxt.eval(f.read()) with open(settings.FILENAME_DRIVER_JS) as f: ctxt.eval(f.read()) apps = json.dumps(self.apps) categories = json.dumps(self.categories) data = { 'host': urlparse(url).hostname, 'url': url, 'html': html, 'headers': headers } return json.loads(ctxt.eval( "w.apps={apps}; w.categories={categories}; w.driver.data={data}; w.driver.init();".format( apps=apps, categories=categories, data=json.dumps(data) ) ))
def execute(script): logging.info("executing script...") env = CommonJsEnv() with PyV8.JSContext(env) as ctxt: return ctxt.eval(script)
def parse_page(self, response): _port_expr_values = response.xpath( '//script[2]/text()').extract_first().encode()[1:] trs = response.xpath('//table[@id="proxylist"]/tr') if response.url == 'http://nntime.com/proxy-ip-01.htm': trs = trs[1:] if trs: for tr in trs: item = ProxyItem() _ip = tr.xpath('td[2]/text()').extract_first().encode() _port_expr = tr.xpath('td[2]//script/text()').extract_first( ).encode().split('":"')[1][1:-1].split('+') with PyV8.JSContext() as env: env.eval(_port_expr_values) _port = '' for expr in _port_expr: _port += str(env.eval(expr)) _type = tr.xpath('td[3]/text()').extract_first() _id = hash(_ip + _port + _type + self.name) item['_id'] = _id item['source'] = self.name item['ip'] = _ip item['port'] = _port item['p_type'] = _type yield item
def getJSRunResult(self,html): html = self.getFirstHtmlContent(html) functionBegin = html.index('(function(a){eval(function(p,a,c,k,e,d)') functionEnd = html.index(');return p;}') functionBody = 'function test(p,a,c,k,e,d)'+html[functionBegin+39:functionEnd+12]+';' params = html[functionEnd+12:] p = re.compile(r";',(\d+),(\d+),'\|") res = p.search(html).group() paramP = "var p = "+ params[1:params[1:].index(res)+3] +";" paramA = "var a = " + p.findall(html)[0][0]+";" paramC = "var c = " + p.findall(html)[0][1]+";" params = params.split(res[:-2])[1] paramKIndexEnd = params.index(',0,{}));})') paramK = "var k = "+ params[:paramKIndexEnd]+";" paramE = "var e = 0 ;" paramD = "var d = {};" newParam = functionBody+paramP+paramA+paramC+paramK+paramE+paramD +'test(p,a,c,k,e,d);' ctxt = PyV8.JSContext() ctxt.enter() func = ctxt.eval(newParam) func = func[:func.rindex('setTimeout("lo')] preRun = "var dc=\"\";var t_d={hello:\"world\",t_c:function(x){if(x===\"\")return;if(x.slice(-1)===\";\"){x=x+\" \";};if(x.slice(-2)!==\"; \"){x=x+\"; \";};dc=dc+x;}};" a2 = params.split('0,{}));})(')[1] paramA2 = "var a = "+a2[:a2.rindex(';document.cookie')-1]+";" allFunction = "(function(){" + preRun + paramA2 + func + "return dc })" func = ctxt.eval(allFunction) res = func().split(';')[0]#__jsl_clearance=1448948882.413|0|vMuEK%2F2YtIOLddxT5kiyniI0yDA%3D return res
def exec_(self, source): source = '''\ (function() {{ {0}; {1}; }})()'''.format(encode_unicode_codepoints(self._source), encode_unicode_codepoints(source)) source = str(source) import PyV8 import contextlib #backward compatibility with contextlib.nested(PyV8.JSContext(), PyV8.JSEngine()) as (ctxt, engine): js_errors = (PyV8.JSError, IndexError, ReferenceError, SyntaxError, TypeError) try: script = engine.compile(source) except js_errors as e: raise RuntimeError(e) try: value = script.run() except js_errors as e: raise ProgramError(e) return self.convert(value)
def render(app_js_path, component_name, component_data): """ Use React's `renderComponentToString` function to render the given component using the given data. """ make_js_bundle(app_js_path) # compilation is a side effect... log.debug("Initializing JS context") start = time.time() serialized_data = json.dumps(component_data, cls=ORMEncoder) log.debug("TIME: Serialized component data in %.3f", (time.time() - start) * 1000) script = ''' // Force output to string React.renderComponentToString(%s(%s)); ''' % (component_name, serialized_data) start = time.time() ctx = PyV8.JSContext(extensions=["lib/react", "lib/moment", app_js_path]) log.debug("TIME: Created context in %.3f", (time.time() - start) * 1000) start = time.time() with ctx: try: result = ctx.eval(script) except Exception as e: log.error("Javascript error! %r", e) raise log.debug("TIME: Evaluated bundle in %.3f", (time.time() - start) * 1000) return result
def context(self): #if not hasattr(self, '_context'): if '_context' not in self.__dict__: self._context = PyV8.JSContext(self) with self._context as ctxt: thug_js = os.path.join(thug.__configuration_path__, 'scripts', "thug.js") ctxt.eval(open(thug_js, 'r').read()) if log.ThugOpts.Personality.isIE( ) and log.ThugOpts.Personality.browserMajorVersion < 8: storage_js = os.path.join(thug.__configuration_path__, 'scripts', "storage.js") ctxt.eval(open(storage_js, 'r').read()) hooks_folder = os.path.join(thug.__configuration_path__, 'hooks') for hook in sorted( [h for h in os.listdir(hooks_folder) if h.endswith('.js')]): ctxt.eval( open(os.path.join(hooks_folder, hook), 'r').read()) PyV8.JSEngine.collect() return self._context
def __init__(self): PyV8.JSDebugger.__init__(self) threading.Thread.__init__(self, name='dbg') self.terminated = False self.exitcode = None self.daemon = True self.evalContext = PyV8.JSContext(Shell(self))
def clscontent(alljs): try: ctx = PyV8.JSContext() ctx.enter() ctx.eval(alljs) return ctx.eval('rules') except: logging.exception('clscontent function exception') return None