def __init__(self, program, mut_mode="random", seg_loc_change=True): ''' program: strings of target program mut_mode: mutation mode. This will be applied when generate mutate(gen_mutant) seg_loc_change: If it's true, code string will be replaced to code generated by AST of original code. While mutating code, mutated code does not preserve blank line and comments. This cause inconsistency of line number number between original code and mutated code. seg_loc_change is to compensate the abnormality instance objects: - node_history: list of (mutation type, loc, node class, *rollback_info). node class is used for rolling back. Do not directly extract information ''' assert isinstance(program, str) self.code = program self.mut_cnt = 0 self.mut_mode = mut_mode self.recent_mut = None self.node_list = [] self.node_list_used = [] self.recycling = 0 # number of recyling of node if seg_loc_change: self.parsed = esprima.parseScript(program) self.parsed = esprima.parseScript(self.gen_code(), delegate=self.mutant_cand_stack, loc=True) else: self.parsed = esprima.parseScript(program, delegate=self.mutant_cand_stack, loc=True) self.node_history = []
def refresh_fitness(self): Logger.start_test(self) if self.already_tested: return if self.get_modified_line() in self.test_result_cache: self.already_tested = True return self.test_result_cache[self.get_modified_line()] try: esprima.parseScript("\n".join(self.get_code())) with open("../output/" + self.args["project"].lower() + "/" + self.args["file"], 'w', encoding='utf-8') as file: file.write("\n".join(self.get_code())) if not os.path.exists(Parameters.ROOT_DIR + '/temp'): os.makedirs(Parameters.ROOT_DIR + '/temp') filename = ''.join( random.choice(string.ascii_lowercase) for _ in range(20)) path = str(Parameters.ROOT_DIR ) + "/" + Parameters.TEMP_DIR + filename + '.test' command = 'python3 Tester.py ' + self.parse_arguments( path ) + " > " + Parameters.ROOT_DIR + '/temp/' + filename + '.log' if Parameters.VERBOSE < 3: command += " 2>&1" process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) process.wait() if os.path.exists(path): with open(path, 'r', encoding='utf-8') as file: test_stat = json.loads(file.readline().strip()) self.fitness = float( Parameters.W_POS * test_stat["passes"] + Parameters.W_NEG * test_stat["failures"]) self.already_tested = True self.test_stat = test_stat self.test_result_cache[self.get_modified_line()] = str( test_stat) Logger.end_test(test_stat) os.remove(path) else: self.test_execution_failed(filename) except esprima.Error: self.test_execution_failed("")
def get_connected_devices(self): res = self.session.get(self.host + '/cgi/cgi_owl.js', timeout=TIMEOUT, verify=self.verify) if res.status_code != HTTPStatus.OK: _LOGGER.warning( 'Failed to get connected devices from gateway; ' 'got HTTP status code %s', res.status_code) connected_devices = {} # Unfortunately, the data is provided to the frontend not as a JSON # blob, but as some JavaScript to execute. The below code uses a # JavaScript parser and AST visitor to extract the known device data # from the script. # # Example response: # # addROD('known_device_list', { 'known_devices': [ { 'mac': 'xx:xx:xx:xx:xx:xx', 'hostname': 'name' } ] }); def visitor(node, metadata): if node.type != 'CallExpression': return if node.callee.type != 'Identifier' or node.callee.name != 'addROD': return if node.arguments[0].value != 'known_device_list': return known_devices_node = None for prop in node.arguments[1].properties: if prop.key.value == 'known_devices': known_devices_node = prop.value if known_devices_node is None: _LOGGER.debug( 'Failed to find known_devices object in response data') return for device in known_devices_node.elements: data = { prop.key.value: prop.value.value for prop in device.properties } if 'activity' not in data or 'mac' not in data or 'hostname' not in data: continue if data['activity'] == 1: connected_devices[data['mac']] = data['hostname'] lines = res.text.split("\n") for line in lines: if "known_device_list" in line: esprima.parseScript(line, {}, visitor) return connected_devices
def analyze (code: str, patternStore: PatternStore): def checkPatterns (node, patterns): for pattern in PATTERNS: pattern(node, patternStore) def traverse (node, meta): checkPatterns(node, PATTERNS) try: esprima.parseScript(code, loc=True, delegate=traverse, tolerant=True) except: esprima.parseScript(code, loc=True, delegate=traverse, tolerant=True, jsx=True)
def extract_strings(javascript: str) -> List[str]: print("Extracting strings...", file=sys.stderr) try: import esprima except ImportError: return extract_strings_slimit(javascript) strings = [] def handle_node(node, *args): if node.type == "Literal" and isinstance(node.value, str): strings.append(node.value) esprima.parseScript(javascript, delegate=handle_node) return strings
def src_ast_dict(path='test/tests12/Book666.js') -> dict: with open(path, 'rU') as f: s = f.read() sast = esprima.parseScript(s, {'attachComment': True}) dast = esprima.toDict(sast) all_dockblocks_to_dict(dast) return dast
def create(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) if serializer.is_valid(): input_js = serializer.data.get('input_js') option = { 'jsx': serializer.data.get('jsx'), 'range': serializer.data.get('range'), 'loc': serializer.data.get('loc'), 'tolerant': serializer.data.get('tolerant'), 'tokens': serializer.data.get('tokens'), 'comment': serializer.data.get('comment') } output_ast = esprima.parseScript(input_js, option).toDict() try: ScriptParse.objects.create(input_js=input_js, output_ast=json.dumps(output_ast), jsx=option['jsx'], range=option['range'], loc=option['loc'], tolerant=option['tolerant'], tokens=option['tokens'], comment=option['comment']) except Exception as e: print(e) return Response({'out_ast': output_ast}) else: Response({'status': 0, 'message': 'incorrect data', 'data': []})
def validate(file_path): """ Returns None if file is valid and returns an error message otherwise. A file is valid if (1) it contains only unicode text (2) it is parseable (i.e., it is structurally well-formed) (3) it does not contain engine-specific functions """ with open(file_path) as source: try: contents = '\n'.join(source.readlines()) except UnicodeDecodeError as e: # fuzzer can add really crazy characters return str(e) # in case of empty file if not contents: return "File is empty" # TODO see if enabling tolerant mode (parser will continue after encountering errors) is useful # for now, just return the error as a string try: ast = esprima.parseScript(contents) # , options={'tolerant': True} except esprima.Error as e: return str(e) except RecursionError as e: return str(e) # user-defined validators validators = [v for k, v in globals().items() if k.startswith("check_")] for v in validators: result = v(ast) if result: return result return None
def addVariableDeclarations(inputProgram): """Transform the input program by adding just before each function a declaration for its undeclared variables """ # parse Javascript code into abstract syntax tree: # NB: esprima: https://media.readthedocs.org/pdf/esprima/4.0/esprima.pdf ast = esprima.parseScript(inputProgram, {'range': True, 'tolerant': True}) # find undeclared vars in functions and declare them before the function outputProgram = inputProgram offset = 0 allUndeclaredVariables = [] for expression in ast.body: if expression.type == 'FunctionDeclaration': # find all undeclared variables: undeclaredVariables = findUndeclaredVariables(expression.body.body, allUndeclaredVariables) # add declarations (var) just before the function: declaration = '\n'.join(['var ' + variable + ';' for variable in undeclaredVariables]) + '\n' startIndex = expression.range[0] + offset outputProgram = outputProgram[ :startIndex] + declaration + outputProgram[ startIndex:] offset += len(declaration) return outputProgram
def AnalisisLexico(codigo, rango=False): analisis = esprima.parseScript(codigo, { 'tokens': True, 'range': rango, 'tolerant': True }) return analisis
def _validate_and_parse_js_and_ts_files(self): """This function validates JavaScript and Typescript files and returns the parsed contents as a Python dictionary. Returns: dict. contains the contents of js and ts files after validating and parsing the files. """ # Select JS files which need to be checked. files_to_check = self.all_filepaths parsed_js_and_ts_files = dict() if not files_to_check: return parsed_js_and_ts_files compiled_js_dir = tempfile.mkdtemp( dir=os.getcwd(), prefix='tmpcompiledjs') if not self.verbose_mode_enabled: python_utils.PRINT('Validating and parsing JS and TS files ...') for filepath in files_to_check: if self.verbose_mode_enabled: python_utils.PRINT( 'Validating and parsing %s file ...' % filepath) file_content = FILE_CACHE.read(filepath) try: # Use esprima to parse a JS or TS file. parsed_js_and_ts_files[filepath] = esprima.parseScript( file_content, comment=True) except Exception as e: # Compile typescript file which has syntax not valid for JS # file. if filepath.endswith('.js'): shutil.rmtree(compiled_js_dir) raise Exception(e) try: compiled_js_filepath = self._compile_ts_file( filepath, compiled_js_dir) file_content = FILE_CACHE.read(compiled_js_filepath) parsed_js_and_ts_files[filepath] = esprima.parseScript( file_content) except Exception as e: shutil.rmtree(compiled_js_dir) raise Exception(e) shutil.rmtree(compiled_js_dir) return parsed_js_and_ts_files
def __init__(self, filepath): with open(filepath, "r", encoding="utf-8") as f: self.rawCode = f.read() self.ast = esprima.parseScript(self.rawCode, range=True, tokens=True) self.stringLiterals = self.getStringLiterals() self.transform = 0
def do_handle(self, request): self.send_script(request['transaction'], ACKNOWLEDGE) script = parseScript(request['body'].decode('utf-8')) command = script.body[0].declarations[0].init.arguments[0].value if command == 'networkEventSubscribe': for _ in range(3): time.sleep(1.0) self.send_script(request['transaction'], b'imageChanged\r{}')
def js_parse(text_or_node, module=False): try: js = (esprima.parseScript(html_cast_text(text_or_node)) if not module else esprima.parseModule(html_cast_text(text_or_node))) assert js.type == 'Program' return (js, tuple()) except esprima.error_handler.Error as e: return (None, [str(e)])
def minify(self, code, source="script"): options = dict(range=True, loc=True) # Parse code if source == "script": syntax = esprima.parseScript(code, options) elif source == "module": syntax = esprima.parseModule(code, options) elif source == "auto": try: syntax = esprima.parseModule(code, options) except: syntax = esprima.parseScript(code, options) else: raise Exception(f"Invalid source type: {source}") # Compile syntax context = Context(self.config, source=code) context.build(syntax) # Return stream return context.stream()
def create(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) if serializer.is_valid(): input_js = serializer.data.get('input_js') ntime = serializer.data.get('ntime') output_ast = esprima.parseScript(input_js).toDict() # try: # ScriptParse.objects.create( # input_js=input_js, output_ast=json.dumps(output_ast), jsx=False, # range=False, loc=False, tolerant=False, tokens=False, comment=False # ) # except Exception as e: # print(e) jt = js_templates(reserved_words='./jsml/js_reserved_words.txt') if not os.path.isfile('./jsml/js_types_templates.pkl') or \ not os.path.isfile('./jsml/js_program_templates.pkl'): return Response({ 'status': 0, 'message': 'Doesn\'t exist DB for Machine learning', 'data': [] }) t = jt.convert_template(output_ast) new_asts = [] import time t_end = time.time() + serializer.data.get('second_limit') print(t_end) for i in range(ntime): print(time.time()) print('ntime -- ', i) if time.time() > t_end: break if i == 0: jt.generate_ast_template(t) new_tt = deepcopy(t) print(new_tt) new_asts.append(new_tt) else: jt.generate_ast_template(jt.generate_random_program(t)) new_tt = deepcopy(t) print(new_tt) new_asts.append(new_tt) new_js = [] for ast in new_asts: print(ast) try: new_js.append(generate(ast)) except Exception as e: print(e) print("success holding js.") return Response({'out_new_js': new_js}) else: Response({'status': 0, 'message': 'incorrect data', 'data': []})
def get_js(html): parser = HTML_Parser() parser.feed(html) try: js = parser.data except Exception as exception: log.error('Exception {0}: {1}\n'.format(type(exception).__name__, exception)) return False parsed = esprima.parseScript(js, {"tolerant": True}) return parsed.body
def extract_data(self, con_class): filecontents = "" try: with open(self.input_file, 'r') as f: for line in f: filecontents += line data = parseScript(filecontents, delegate=con_class) print('Data has been extracted') return data except Exception as e: print('There was an error in the JavaScript file: ' + str(e))
def __init__(self): doc = lxml.html.parse(urlopen(self.url)) root = doc.getroot() script_element = root.cssselect('script')[-1] tree = esprima.parseScript(script_element.text) dom_content_loaded = tree.body[0].expression.arguments[1] gym_list_object = dom_content_loaded.body.body[2] \ .expression.arguments[0].arguments[1] self.data = self.esprima_to_python(gym_list_object)['allGyms']
def js_to_gast(js_input): """ takes js string and converts it to a generic AST """ input_ast = '' try: input_ast = esprima.parseScript(js_input, {"tokens": False}) except: # this will signal to translate that error occurred return None return js_router.node_to_gast(input_ast)
def js_to_gast(program): input_ast = esprima.parseScript(program, {"tokens": True}) # TODO: can add more fields to the generic ast gast = {"type": "root", "body": []} # NOTE: with current implementation, it will just go until it sees something it recognizes # eventually can implement nested structures for node in input_ast.body: if node.type == "ExpressionStatement": gast["body"].append(jsexpr_to_gast(node.expression)) if node.type == "VariableDeclaration": gast["body"].append(jsassign_to_gast(node.declarations)) return gast
def checkJavaScriptSyntax(component, tab): """ Checks each JS code component tabs for syntax errors. Note, catalogue message is formatted using a dict that contains: { 'codeTab': The code component tab as string, 'lineNumber': The line number and error msg as string } Parameters ---------- component: Component The code component being tested tab: str The name of the code component tab being tested """ try: parseScript(str(component.params[tab].val)) except Exception as err: strFields = {'codeTab': tab, 'lineNumber': err.message} # Dont sent traceback because strFields gives better localisation of error alert(4210, component, strFields)
def js2py(data, ret='code', postprocess=None): lines = data.splitlines() visitor = MyVisitor() visitor.lines = lines tree = esprima.parseScript(data, {'tolerant': True}, delegate=visitor) if postprocess: tree = markup(tree) tree = post_process(visitor) if ret == 'code': return astor.to_source(tree.python_ast) elif ret == 'visitor': return visitor else: return tree.python_ast
def make_response(self, request, body=None): content_type = request.get('content_type') if content_type in (ContentType.SCRIPT, ContentType.SCRIPT_SHARED): try: script = parseScript(request['body'].decode('utf-8')) except ParseError as e: logger.exception('%s: %r' % (e, request['body'])) return b'' return body or b'{}' elif content_type == ContentType.KEEP_ALIVE: return KEEP_ALIVE_RESPONSE elif content_type == ContentType.DATA: return DATA_RESPONSE return body or b''
def main(): print("#" * 100) ast = esprima.parseScript(open('js/snowplow.js').read()) el = Element(ast) def parent_type(node): return getattr(getattr(node, 'parent', None), 'type', None) visitor = MatchPropertyVisitor( 'colorDepth', lambda n: print("{}:{}".format(n.type, parent_type(n)))) el.accept(visitor) el.walk()
def get_ast(script_addr, write_addr): try: print('Processing: ', script_addr.split('/')[-1]) script_text = utilities.read_full_file(script_addr) ast = esprima.parseScript(script_text, options={ 'tolerant': True }).toDict() utilities.write_dill_compressed( os.path.join(write_addr, script_addr.split('/')[-1] + '.json'), ast) utilities.append_file('ast_construction.log', script_addr.split('/')[-1] + ' Passed') except Exception as ex: print("Error while creating AST", str(ex)) utilities.append_file('ast_construction.log', script_addr.split('/')[-1] + ' Failed')
def importData(): if len(sys.argv) == 3: with open(sys.argv[1], encoding="utf-8") as data_file: api_list = json.loads(data_file.read()) with open(sys.argv[2]) as js_file: ast = esprima.parseScript(js_file.read()) return ast, api_list else: print( """Warning: invalid input type! This script does not use config.ini. Syntax: $ python3.6 this_script.py <path/to/api_list.json> \ <path/to/javascript.js> """ ) exit() return
def worker_process(input_file): filename = input_file.split('/')[-1] # Try getting the AST using esprima, bail if non-JS syntax try: with open(input_file) as f: ast = esprima.parseScript(f.read()) except esprima.error_handler.Error as e: return False, filename # Create an element using that AST el = Element(ast) for entry in api_symbols: visitor = MatchPropertyVisitor(entry) el.accept(visitor) # Walk down the AST (breadth-first) symbol_counter, extended_symbol_counter, node_dict = el.walk(api_symbols, filename) return True, extended_symbol_counter