def analyse(js, tree): """ Main function called from pdfrankenstein. Analyzes javascript in order to deobfuscate the code. :param js: String of code to analyze :param tree: Tree xml object to use as reference for objects called from the code. :return: String of deobfuscated code """ if not PyV8: return '' with PyV8.JSIsolate(): context = PyV8.JSContext() context.enter() context.eval('evalCode = \'\';') context.eval( 'evalOverride = function (expression) { evalCode += expression; return;}' ) context.eval('eval=evalOverride') try: if tree is not None: create_objs(context, tree) ret = eval_loop(js, context) context.leave() if ret == None: return '' else: return ret except Exception as e: context.leave() # return 'Error with analyzing JS: ' + e.message return ''
def get_javascript_tests_result(self, tests_lines=None, json_obj=None): """ Summary: 对js编写的用例进行断言 Args: tests_lines: 测试语句,用分号分隔 :return: """ json_obj = json_obj or self.get_json_response_obj() if json_obj is None: return None tests_lines = tests_lines or self.get_case_tests() tests_lines = tests_lines.encode('utf8') if isinstance( tests_lines, unicode) else tests_lines js_str = """ (function(jsbody){ var responseBody = jsbody; var tests = new Object();""" + tests_lines + """ return tests }) """ test_dic = dict() with PyV8.JSLocker(): js_context = PyV8.JSContext() js_context.enter() test_func = js_context.eval(js_str.decode('utf8')) test_jsobj = test_func(json_obj) for key in test_jsobj.keys(): test_dic[key] = test_jsobj[key] js_context.leave() return test_dic
def __getattr__(self, key): if key in self._symbols: raise AttributeError(key) if key in ('__members__', '__methods__'): raise AttributeError(key) if key == 'constructor': return PyV8.JSClassConstructor(self.__class__) if key == 'prototype': return PyV8.JSClassPrototype(self.__class__) prop = self.__dict__.setdefault('__properties__', {}).get(key, None) if prop and isinstance(prop[0], collections.Callable): return prop[0]() if log.ThugOpts.Personality.isIE() and key.lower() in ('wscript', 'wsh', ): # Prevent _ActiveXObject loops super(Window, self).__setattr__("WScript", None) WScript = _ActiveXObject(self, "WScript.Shell") super(Window, self).__setattr__(key, WScript) super(Window, self).__setattr__("WScript", WScript) return WScript context = self.__class__.__dict__['context'].__get__(self, Window) try: self._symbols.add(key) symbol = context.eval(key) except: raise AttributeError(key) finally: self._symbols.discard(key) if isinstance(symbol, PyV8.JSFunction): _method = None if symbol in self._methods: _method = symbol.clone() if _method is None: _method = new.instancemethod(symbol, self, Window) # _method = symbol.__get__(self, Window) setattr(self, key, _method) context.locals[key] = _method return _method if isinstance(symbol, (six.string_types, bool, numbers.Number, datetime.datetime, PyV8.JSObject)): setattr(self, key, symbol) context.locals[key] = symbol return symbol raise AttributeError(key)
def __to_python_format(js): ctx = PyV8.JSContext() ctx.enter() ctx.eval(js) expect_all = dict((d[0].strftime('%Y/%m/%d'), d[1]) for d in PyV8.convert(ctx.locals["expectAll"]["data"])) result_all = dict((d[0].strftime('%Y/%m/%d'), d[1]) for d in PyV8.convert(ctx.locals["resultAll"]["data"])) return {"expectAll": expect_all, "resultAll": result_all}
def get_eleven(): ''' Get "eleven" from oceanball Return ------ eleven: string parameter to get detail info ''' oceanball, cas = get_oceanball() ocean = requests.get(oceanball).content.decode('utf8') ocean = ocean.replace('eval', 'JSON.stringify') ctxt = PyV8.JSContext() ctxt.__enter__() ocean = ctxt.eval(ocean) ocean = eval(ocean) ocean = ocean.replace(cas, 'eleven=' + cas) ctxt = PyV8.JSContext() with PyV8.JSContext() as ctxt: ctxt.eval( 'var hotel_id = "433176"; var site = {}; site.getUserAgent = function(){}; var Image = function(){}; var window = {}; window.document = {body:{innerHTML:"1"}, documentElement:{attributes:{webdriver:"1"}}, createElement:function(x){return {innerHTML:"1"}}}; var document = window.document;window.navigator = {"appCodeName":"Mozilla", "appName":"Netscape", "language":"zh-CN", "platform":"Win"}; window.navigator.userAgent = site.getUserAgent(); var navigator = window.navigator; window.location = {}; window.location.href = "http://hotels.ctrip.com/hotel/"+hotel_id+".html"; var location = window.location;' ) # ctxt.eval('var div = {innerHTML:"1"};') ctxt.eval( 'var navigator = {userAgent:{indexOf: function(x){return "1"}}, geolocation:"1"}' ) ctxt.eval('var %s = function(x){return x()}' % cas) ctxt.eval(ocean) vars = ctxt.locals eleven = vars.eleven return eleven
def solve_cf_challenge(self, resp, headers, **kwargs): headers = headers.copy() url = resp.url parsed = urlparse(url) domain = parsed.netloc page = resp.content kwargs.pop("params", None) # Don't pass on params try: # Extract the arithmetic operation challenge = re.search(r'name="jschl_vc" value="(\w+)"', page).group(1) builder = re.search(r"setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n", page).group(1) builder = re.sub(r"a\.value =(.+?) \+ .+?;", r"\1", builder) builder = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", builder) except AttributeError: # Something is wrong with the page. This may indicate Cloudflare has changed their # anti-bot technique. If you see this and are running the latest version, # please open a GitHub issue so I can update the code accordingly. raise IOError("Unable to parse Cloudflare anti-bots page. Try upgrading cfscrape, or " "submit a bug report if you are running the latest version.") # Lock must be added explicitly, because PyV8 bypasses the GIL with PyV8.JSLocker(): with PyV8.JSContext() as ctxt: # Safely evaluate the Javascript expression answer = str(int(ctxt.eval(builder)) + len(domain)) params = {"jschl_vc": challenge, "jschl_answer": answer} submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed.scheme, domain) headers["Referer"] = url return requests.get(submit_url, params=params, headers=headers, **kwargs)
def __init__(self): if HAS_PYV8: with PyV8.JSLocker(): self.ctx = PyV8.JSContext(GlobalContext()) self.ctx.enter() self.init() self.ctx.leave()
def exec_(self, source): source = '''\ (function() {{ {0}; {1}; }})()'''.format(encode_unicode_codepoints(self._source), encode_unicode_codepoints(source)) source = str(source) import PyV8 import contextlib #backward compatibility with contextlib.nested(PyV8.JSContext(), PyV8.JSEngine()) as (ctxt, engine): js_errors = (PyV8.JSError, IndexError, ReferenceError, SyntaxError, TypeError) try: script = engine.compile(source) except js_errors as e: raise RuntimeError(e) try: value = script.run() except js_errors as e: raise ProgramError(e) return self.convert(value)
def walk(self, script): self.block_no = 1 try: PyV8.JSEngine().compile(script).visit(self) except UnicodeDecodeError: enc = chardet.detect(script) PyV8.JSEngine().compile(script.decode(enc['encoding'])).visit(self)
def fetch(self, url, postdata=None, headers={}): try: if postdata: postdata = urllib.urlencode(PyV8.convert(postdata)) r = urllib2.Request(url=url, data=postdata, headers=PyV8.convert(headers)) r.add_header('user-agent', 'DrEvalFetch/%s (%s)' % (doctoreval.__version__, doctoreval.__url__)) f = urllib2.urlopen(r) return JSObject({"content": f.read(), "code": f.getcode(), "headers": JSObject(f.info().dict)}) except (urllib2.HTTPError, urllib2.URLError), e: self._context.throw(str(e))
def tes(): with PyV8.JSLocker(): ctxt.enter() func = ctxt.eval( """(function(){function hello(){return "Hello world.";}return hello();})""" ) print(func()) ctxt.leave() PyV8.JSUnlocker() PyV8.JSEngine.collect()
def energy(self, msg): with PyV8.JSLocker(): with PyV8.JSContext() as ctex: with open("deal.js") as jsfile: ctex.eval(jsfile.read()) encrypt_pwd = ctex.eval('encryptString("%s","%s","%s")' % (msg, self.e, self.m)) print encrypt_pwd return encrypt_pwd
def run(self): with PyV8.JSIsolate(): with PyV8.JSContext() as context: with open(self.sjcljs) as fh: sjcl = fh.read() context.eval(sjcl) resp = context.eval('sjcl.decrypt("%s", \'%s\')' % \ (self.password, self.message)) self.result = resp del context
def run(self): with PyV8.JSIsolate(): with PyV8.JSContext() as context: with open(self.sjcljs) as fh: sjcl = fh.read() context.eval(sjcl) resp = context.eval('sjcl.encrypt("%s", "%s");' % (self.password, self.message)) self.result = JSONDecoder().decode(resp) del context
def dxPwdEncrypt(pwd): with PyV8.JSLocker(): with PyV8.JSContext() as ctxt: with open(basePath + "dx_encrypt.js", 'r') as f: js = f.read() js += '\n' ctxt.eval(js) # encryptPwd = ctxt.eval("valAesEncryptSet('%s')" % pwd) # return encryptPwd
def parse_price(resp, rule): print '++++++++++++++++++++++++++++++++++++++++++++++++++++++++' print resp.text jscode = resp.text #[{'p': '769.00', 'm': '859.00', 'id': 'J_954086'}] obj = Global() info = None with PyV8.JSContext(obj) as ctx: c = ctx.eval(jscode) info = PyV8.convert(c) print info print '++++++++++++++++++++++++++++++++++++++++++++++++++++++++' return info
def __getattr__(self, name): if name in self._symbols: raise AttributeError(name) if name in ('__members__', '__methods__'): raise AttributeError(name) if name == 'constructor': return PyV8.JSClassConstructor(self.__class__) if name == 'prototype': return PyV8.JSClassPrototype(self.__class__) prop = self.__dict__.setdefault('__properties__', {}).get(name, None) if prop and isinstance(prop[0], collections.Callable): return prop[0]() context = self.__class__.__dict__['context'].__get__(self, Window) try: self._symbols.add(name) symbol = context.eval(name) except: raise AttributeError(name) finally: self._symbols.discard(name) if isinstance(symbol, PyV8.JSFunction): #_method = None # #if symbol in self._methods: # _method = symbol.clone() # #if _method is None: # _method = new.instancemethod(symbol, self, Window) # _method = symbol.__get__(self, Window) # #setattr(self, name, _method) #context.locals[name] = _method #return _method setattr(self, name, symbol) context.locals[name] = symbol return symbol if isinstance(symbol, (thug_string, bool, numbers.Number, datetime.datetime, PyV8.JSObject)): setattr(self, name, symbol) context.locals[name] = symbol return symbol raise AttributeError(name)
def walk(self, script): self.block_no = 1 try: PyV8.JSEngine().compile(script).visit(self) except UnicodeDecodeError: enc = log.Encoding.detect(script, safe=True) if not enc: return PyV8.JSEngine().compile(script.decode(enc)).visit(self) except: pass
def __init__(self, pure=False, context=None): """Setup the context""" if not pure and not context: console = Console() context = {'XMLHttpRequest': XMLHttpRequest, 'console': console} if context: self.context = PyV8.JSContext(context) else: self.context = PyV8.JSContext() self.context.enter() if not pure: self.eval(COMPAT)
def executeJS(self, js_func_string, arg): ''' self.ctxt.enter() func = self.ctxt.eval("({js})".format(js=js_func_string)) return func(arg) ''' ctxt = PyV8.JSContext() with PyV8.JSLocker(): ctxt.enter() vl5x = ctxt.eval("({js})".format(js=js_func_string)) sign = vl5x(arg) ctxt.leave() return sign
def walk(self, script): self.block_no = 1 try: PyV8.JSEngine().compile(script).visit(self) except UnicodeDecodeError: enc = log.Encoding.detect(script, safe=True) if enc is None: return PyV8.JSEngine().compile(script.decode(enc['encoding'])).visit(self) except: # pylint:disable=bare-except pass
def __parse(self, tvid, vid, uid, bid): js_context = '' with open("ArrayBuffer.js", 'r') as f: js_context += f.read() with open("pcweb.js", 'r') as f: js_context += f.read() time_str = str(int(time.time() * 1000)) ctx = PyV8.JSContext() with PyV8.JSLocker(): ctx.enter() ctx.eval(js_context) authkey = ctx.locals.authkey( ctx.locals.authkey('') + time_str + tvid) callback = ctx.locals.callback() params = { 'tvid': tvid, 'vid': vid, 'bid': str(bid), 'tm': time_str, 'k_uid': uid, 'callback': callback, 'authKey': authkey, } global_params.update(params) params_encode = urllib.urlencode(global_params) path_get = '/jp/dash?' + params_encode vf = ctx.locals.vf(path_get) path_get += '&vf=%s' % vf ctx.leave() req = urllib2.Request('http://cache.video.iqiyi.com/' + path_get.lstrip('/')) res = self.opener.open(req) raw = res.read() text = raw_decompress(raw, res.info()) json_str = re.search('try{\w{0,}\((.+})(\s)?\);}catch', text).group(1) if json_str: ret = json.loads(json_str) else: ret = None res.close() return ret
def jseval(code): global _jw if not _jw: ctxt = PyV8.JSContext() ctxt.enter() retval = ctxt.eval(code) ctxt.leave() else: k = PyV8.JSLocker() k.enter() _jw.context.enter() retval = _jw.context.eval(code) _jw.context.leave() k.leave() return retval
def imitate_cookie(self, r_1): print '**********************************************imitate_cookie' ctxt = PyV8.JSContext() ctxt.enter() soup = BeautifulSoup(r_1.text, 'lxml') # print soup script1 = soup.select('script')[0].text # print script1 script2 = ("(function(){" + script1.replace('eval(y', 'return (y') + "})").encode('utf-8') # print '-'*100 # print script2 func = ctxt.eval(script2) script3 = func() # print script3 script4 = script3.replace("while(window._phantom||window.__phantomas){};", "") \ .replace("if((function(){try{return !!window.addEventListener;}catch(e){return false;}})())" "{document.addEventListener('DOMContentLoaded',l,false);}else{document.attachEvent('onreadystatechange',l);}", '') \ .replace(r"var h=document.createElement('div');h.innerHTML='<a href=\'/\'>x</a>';h=h.firstChild.href;", "var h='http://www.gsxt.gov.cn/';") script5 = re.sub("document.cookie=.+\\);", 'return dc;', re.sub("setTimeout[^;]+;", '', script4)) + "return l();" # print script5.replace('return return', 'return') script6 = "(function(){" + script5 + "})" func2 = ctxt.eval(script6) cookie = func2() self.session.cookies.set(*cookie.split('='))
def get_jsl_clearance(req, post_cookie): req.encoding = 'utf-8' script = req.text.strip().replace("<script>", "") script = script.replace("</script>", "") script = script.replace(";eval", ";document.write") script = script.replace("\x00", "") class v8Doc(PyV8.JSClass): def write(self, s): global result result = s class Global(PyV8.JSClass): def __init__(self): self.document = v8Doc() glob = Global() ctxt = PyV8.JSContext(glob) ctxt.enter() ctxt.eval(script) script = result.replace("while(window._phantom||window.__phantomas){};", "") script = script.replace( "setTimeout('location.href=location.href.replace(/[\?|&]captcha-challenge/,\\\'\\\')',1500);", "") script = script.replace( "if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('DOMContentLoaded',l,false);}else{document.attachEvent('onreadystatechange',l);}", "l();") r = re.compile(r'document.cookie.*?\)\;') script = re.sub(r, 'document.write(dc)', script) ctxt.eval(script) name, value = result.split('=') post_cookie[name] = value return post_cookie
def wapple(self, id, url): ctxt = PyV8.JSContext() ctxt.enter() f1 = open(os.path.join(self.file_dir, 'js/wappalyzer.js')) f2 = open(os.path.join(self.file_dir, 'js/driver.js')) ctxt.eval(f1.read()) ctxt.eval(f2.read()) f1.close() f2.close() host = urlparse(url).hostname response = requests.get(url) html = response.text headers = dict(response.headers) data = {'host': host, 'url': url, 'html': html, 'headers': headers} apps = json.dumps(self.apps) categories = json.dumps(self.categories) results = ctxt.eval("w.apps = %s; w.categories = %s; w.driver.data = %s; w.driver.init();" % (apps, categories, json.dumps(data))) #print results answers = json.loads(results) print "{0}: {1} - {2}".format(id, url, answers.__len__()) for app, thing in answers.items(): categories = "" version = thing["version"] for c in thing["categories"]: categories = c + "," self.cur.execute( feature_insert.format(id, app, categories.strip(","), version) ) self.con.commit()
def _get_js_obj(self, ctx, obj): """ Convert Python object to JS object and return it :param PyV8.JSContext ctx: current JS context :param mixed obj: object for convert """ if isinstance(obj, (list, tuple)): js_list = [] for entry in obj: js_list.append(self._get_js_obj(ctx, entry)) return PyV8.JSArray(js_list) elif isinstance(obj, dict): js_obj = ctx.eval('new Object();') for key in obj.keys(): try: js_obj[key] = self._get_js_obj(ctx, obj[key]) except Exception as e: if (not str(e).startswith('Python argument types in')): raise import unicodedata nkey = unicodedata.normalize('NFKD', key).encode( 'ascii', 'ignore') js_obj[nkey] = self._get_js_obj(ctx, obj[key]) return js_obj else: return obj
def test_kuaidaili(): """对快代理上cookie加密的解决办法 @refer: https://zhuanlan.zhihu.com/p/25957793 """ kuai_url = "http://www.kuaidaili.com/proxylist/1/" # 首次访问获取动态加密的JS kuai_first_html = get_kuaidaili_html(kuai_url) if not kuai_first_html: return False # 执行js代码, 获取cookies信息 ctxt = PyV8.JSContext() ctxt.__enter__() js_path = BASE_DIR + os.sep + 'hm.js' js_data = None with open(js_path, 'r') as fd: js_data = fd.read() if not js_data: print('读取的hm.js文本为空') return False js_data_html = """ <!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <script type="text/javascript"> {} </script> </head> </html> """.format(js_data) import pdb pdb.set_trace() ctxt.eval(js_data) print('Success execute javascript code')
def analyze_from_data(self, url, html, headers): logger.debug('Analyzing: %s' % url) ctxt = PyV8.JSContext() ctxt.enter() with open(settings.FILENAME_WAPPALIZER_JS) as f: ctxt.eval(f.read()) with open(settings.FILENAME_DRIVER_JS) as f: ctxt.eval(f.read()) apps = json.dumps(self.apps) categories = json.dumps(self.categories) data = { 'host': urlparse(url).hostname, 'url': url, 'html': html, 'headers': headers } return json.loads(ctxt.eval( "w.apps={apps}; w.categories={categories}; w.driver.data={data}; w.driver.init();".format( apps=apps, categories=categories, data=json.dumps(data) ) ))
def download(path, chapter='', pagenum=0): file_path = os.path.join(path, '%03d.jpg' % pagenum) if os.path.isfile(file_path): return True url = url_gen(chapter, pagenum) myheaders = copy.copy(headers) myheaders['Referer'] = url fun = url + \ 'chapterfun.ashx?cid=%d&page=%d&key=&language=1>k=6' % ( extract_id(chapter), pagenum) r1 = requests.get(fun, headers=myheaders) if r1.status_code != 200: return False with PyV8.JSContext() as ctxt: ctxt.enter() func = ctxt.eval(r1.text[4:]) func2 = ctxt.eval(func) html = str(func2).split(',')[0] r = requests.get(html, headers=myheaders) if r.status_code == 404: print 'Blocked' return False else: if not os.path.exists(path): os.mkdir(path) with open(file_path, 'wb') as f: f.write(r.content) return True
def context(self): #if not hasattr(self, '_context'): if '_context' not in self.__dict__: self._context = PyV8.JSContext(self) with self._context as ctxt: thug_js = os.path.join(thug.__configuration_path__, 'scripts', "thug.js") ctxt.eval(open(thug_js, 'r').read()) if log.ThugOpts.Personality.isIE( ) and log.ThugOpts.Personality.browserMajorVersion < 8: storage_js = os.path.join(thug.__configuration_path__, 'scripts', "storage.js") ctxt.eval(open(storage_js, 'r').read()) hooks_folder = os.path.join(thug.__configuration_path__, 'hooks') for hook in sorted( [h for h in os.listdir(hooks_folder) if h.endswith('.js')]): ctxt.eval( open(os.path.join(hooks_folder, hook), 'r').read()) PyV8.JSEngine.collect() return self._context
def log(self, *args): args2 = [] for arg in args: arg = PyV8.convert(arg) args2.append(arg) print(" ".join([str(x) for x in args2]))
def get_tk(self): with PyV8.JSContext() as ctxt: # 恢复'\x'的转义功能 TKK = codecs.getdecoder("unicode_escape")(self.get_TKK())[0] func = ctxt.eval(TKK) ctxt.eval(""" var b = function (a, b) { for (var d = 0; d < b.length - 2; d += 3) { var c = b.charAt(d + 2), c = "a" <= c ? c.charCodeAt(0) - 87 : Number(c), c = "+" == b.charAt(d + 1) ? a >>> c : a << c; a = "+" == b.charAt(d) ? a + c & 4294967295 : a ^ c } return a } var tk = function (a,TKK) { for (var e = TKK.split("."), h = Number(e[0]) || 0, g = [], d = 0, f = 0; f < a.length; f++) { var c = a.charCodeAt(f); 128 > c ? g[d++] = c : (2048 > c ? g[d++] = c >> 6 | 192 : (55296 == (c & 64512) && f + 1 < a.length && 56320 == (a.charCodeAt(f + 1) & 64512) ? (c = 65536 + ((c & 1023) << 10) + (a.charCodeAt(++f) & 1023), g[d++] = c >> 18 | 240, g[d++] = c >> 12 & 63 | 128) : g[d++] = c >> 12 | 224, g[d++] = c >> 6 & 63 | 128), g[d++] = c & 63 | 128) } a = h; for (d = 0; d < g.length; d++) a += g[d], a = b(a, "+-a^+6"); a = b(a, "+-3^+b+-f"); a ^= Number(e[1]) || 0; 0 > a && (a = (a & 2147483647) + 2147483648); a %= 1E6; return a.toString() + "." + (a ^ h) } """) vars = ctxt.locals Tkk = vars.tk tk = Tkk(self.content,func) print(tk) return tk
def eval_coffee_footprint(coffee): meta = eval_coffee_meta(coffee) if 'format' not in meta: raise Exception("Missing mandatory #format meta field") else: format = meta['format'] if format not in supported_formats: raise Exception("Unsupported file format. Supported formats: %s" % (supported_formats)) # only compile the compiler once global js_make_js_ctx global js_make_js_from_coffee global js_ctx_cleanup_count js_ctx_cleanup_count = js_ctx_cleanup_count + 1 # HACK: occationally cleanup the context to avoid compiler slowdown # will need a better approach in the future if js_ctx_cleanup_count == 10: js_make_js_ctx = None js_make_js_from_coffee = None js_ctx_cleanup_count = 0 if js_make_js_ctx == None: prepare_coffee_compiler() try: js_make_js_ctx.enter() ground = pkg_resources.resource_string(grind.__name__, "ground-%s.coffee" % (format)) ground_js = js_make_js_from_coffee(ground) js = js_make_js_from_coffee(coffee + "\nreturn footprint()\n") with PyV8.JSContext() as ctxt: js_res = ctxt.eval("(function() {\n" + ground_js + js + "\n}).call(this);\n") pl = PyV8.convert(js_res) pl.append(meta) return pl finally: js_make_js_ctx.leave()
def setUp(self): self.assertIsNotNone(self.jsparser_source) self.ctxt = PyV8.JSContext(Global()) self.ctxt.enter() self.ctxt.eval(self.jsparser) testcode = "var s='%s'; var t='%s';parseStructure(s, t);" self.jsparser = lambda x, y: convert_to_unicode(PyV8.convert( self.ctxt.eval(testcode % (x.replace('\n', "' + \n '"), y)))) self.parser = {"pyparser": pyparser, "jsparser": self.jsparser}
def wrapper(*args): if args: if isinstance(args[-1], JSObject): kws = v8.convert(args[-1]) args = args[:-1] else: kws = {} return f(*args, **kws) else: return f()
def jd_save2db(data, opt): skus = data['skuid'] url = 'http://p.3.cn/prices/mgets?skuIds=J_%s&type=1' % skus #(',J_'.join(skus)) o = {'referer': opt.url} # print opt resp = spider.fetch(url, o) jscode = resp.text #[{'p': '769.00', 'm': '859.00', 'id': 'J_954086'}] obj = Global() info = None with PyV8.JSContext(obj) as ctx: c = ctx.eval(jscode) info = PyV8.convert(c) # print info print '++++++++++++++++++++++++++++++++++++++++++++++++++++++++' data['price'] = info[0]['p'] data['old_price'] = info[0]['m'] data['title'] = data['name'].decode('utf8') save2db(data, opt)
def jd_parse_item_info(nodes, name, values, item): # print '================================================================================' if len(nodes) < 1: return node = nodes[0] jscode = node.text_content() # print 'js content' ,jscode info = None obj = Global() with pyv8_jslocker: with PyV8.JSContext(obj) as ctx: ctx.eval(jscode) info = PyV8.convert(ctx.locals.pageConfig) prod = info['product'] values.update(prod) if not values.get('title'): pass
def script(self, obj={}): obj = PyV8.convert(obj) self._context.eval("function _runscript(%s) { %s }" % (', '.join(obj.keys()), self._script)) return self._context.eval("_runscript(%s);" % ', '.join([simplejson.dumps(v) for v in obj.values()]))