Пример #1
0
def parse(expression, mode='eval'):
    if mode == 'eval':
        return Parser().parse(expression).children()[0].expr
    elif mode == 'exec':
        return Parser().parse(expression).children()

    raise TypeError("Only eval, exec modes allowed")
Пример #2
0
    def case(self, case):
        parser_a = Parser()
        result_a = parser_a.parse(case)

        parser_b = Parser()
        result_b = parser_b.parse(case)

        self.assertEqual(result_a, result_b)
Пример #3
0
def scan_js(crawler, url, content):
    '''
    scan javascript for url assignments (like ajax calls).
    '''
    LOGGER.info('Scanning Javascript on %s' % url)

    parser = Parser()
    tree = parser.parse(content)
    for node in nodevisitor.visit(tree):
        if not isinstance(node, ast.Assign):  # <something>: <something>
            continue
        leftval = getattr(node.left, 'value', '')  # 'leftval': <something>
        if not leftval:
            continue
        if 'url' not in leftval:  # 'url': <something>
            continue
        if isinstance(node.right, ast.String):  # 'url': 'somestring'
            LOGGER.info('Found interesting url in JS: %s' %
                        node.right.value[1:-1])
            crawler.check_link(url, node.right.value[2:-1])
        for item in node.right.__dict__.values():  # string in <something>
            # <something> may be function_call() / variable + 'somestring'
            if isinstance(item, ast.String):
                LOGGER.info('Found interesting url in JS: %s' %
                            item.value[1:-1])
                crawler.check_link(url, item.value[2:-1])
Пример #4
0
def get_forecast(link):
    html_doc = urllib2.urlopen(link).read()
    soup = BeautifulSoup(html_doc, "html.parser")

    forecast_text = soup.find("div", id="div_wgfcst1").find("script").string

    parser = Parser()
    forecast_tree = parser.parse(forecast_text)

    full_data = {
        parse_key(node.left): parse_value(node.right)
        for node in nodevisitor.visit(forecast_tree)
        if isinstance(node, ast.Assign)
    }

    forecast_tree = parser.parse(forecast_text)

    forecast = {
        parse_key(node.left): parse_array(node.right)
        for node in nodevisitor.visit(forecast_tree)
        if isinstance(node, ast.Assign) and isinstance(node.right, ast.Array)
    }

    full_data.update(forecast)
    return full_data
Пример #5
0
def extract_g_config(script_text):
    parser = Parser()
    ast_tree = parser.parse(script_text)
    for node in nodevisitor.visit(ast_tree):
        if isinstance(node,
                      ast.VarDecl) and node.identifier.value == 'g_config':
            return extract_object_as_map(node.initializer)
Пример #6
0
def position_info_from_naver(url):
    parsed_obj = urlparse(url)
    article_info_no = 1
    if parsed_obj.netloc == 'm.land.naver.com':
        article_info_no = parsed_obj.path.split('/')[-1]
    elif parsed_obj.netloc == 'new.land.naver.com':
        article_info_no = dict(parse_qsl(parsed_obj.query))['articleNo']
    else:
        raise Exception('unknown url : ' + parsed_obj.netloc )

    r = requests.get('https://m.land.naver.com/article/info/' + str(article_info_no))
    soup = BeautifulSoup(r.content, "html.parser")
    res = dict()

    for sc in soup.findAll("script"):
        pos = sc.text.find('land.articleDetail.jsonPageData')
        if pos > 0:
            bracket_start_pos = sc.text.find('{',pos)
            bracket_end_pos = sc.text.find(';', pos)
            #print(sc.text[bracket_start_pos:bracket_end_pos])
            #js_obj_txt = sc.text[bracket_start_pos:bracket_end_pos]
            js_obj_txt = sc.text[pos:bracket_end_pos]
            # print(js_obj_txt)
            parser = Parser()
            tree = parser.parse(js_obj_txt)
            visitor = NaverDataVisitor()
            visitor.visit(tree)
            res = visitor.res

    return res
Пример #7
0
def card_price_history(setname, cardname):
    '''
    Scrapes price history of card from MTGPrice.com, using javascript parser
    Input:
        Setname and cardname are strings, generally taken from Scryfall API.
    Output:
        A numpy array of price history, each 'row' in the form [timestamp, price]
    '''
    # Turn card data into soup
    link = 'https://www.mtgprice.com/sets/' + '_'.join(
        setname.split()) + '/' + '_'.join(cardname.split())
    soup = BeautifulSoup(requests.get(link).content, 'html.parser')

    # GET RESULTS
    text_to_find = 'var results = ['
    history = []
    for script in soup.findAll('script', type='text/javascript'):
        if text_to_find in script.text:
            parser = Parser()
            tree = parser.parse(script.text)
            for node in nodevisitor.visit(tree):
                if isinstance(node, ast.Assign) and getattr(
                        node.left, 'value', '') == "\"data\"":
                    for prices in node.right.items:
                        history.append(
                            [prices.items[0].value, prices.items[1].value])
                    break
    return np.array(history)
Пример #8
0
 def test_func(self):
     parser = Parser()
     tree = parser.parse(input)
     mangle(tree, toplevel=True)
     self.assertMultiLineEqual(
         textwrap.dedent(tree.to_ecma()).strip(),
         textwrap.dedent(expected).strip())
Пример #9
0
    def parse_country(self, response):
        charts = response.xpath('//*[@class="row graph_row"]')
        total_corona_chart = charts[0]
        script = total_corona_chart.xpath('div/script/text()').extract()[0]
        title = total_corona_chart.xpath('div/h3/text()').extract()[0]
        try:
            country_name = title[title.index(" in ")+4:]
            if country_name[:4] == "the ":
                country_name = country_name[4:]
        except e:
            raise ValueError("Worldometer changed their labels.\
                              Hold your pain, Harold.")

        parser = Parser()
        tree = parser.parse(script)
        data = [None, None] # dates and corresponding number of cases
        for node in nodevisitor.visit(tree):
            if isinstance(node, ast.Assign):
                if getattr(node.left, 'value', '') == 'categories' and not data[0]:
                    print("\nparsing dates\n")
                    data[0] = [eval(getattr(s, 'value', '')) for s in getattr(node.right, 'items', '')]
                elif getattr(node.left, 'value', '') == 'data' and not data[1]:
                    print("\nparsing number of cases\n")
                    data[1] = [int(getattr(n, 'value', '')) for n in getattr(node.right, 'items', '')]
        assert data[0] and data[1] and len(data[0]) == len(data[1])
        with open("data/%s.csv" % country_name, 'w+') as f:
            for k in range(len(data[0])):
                f.write(data[0][k])
                f.write(',')
                f.write(str(data[1][k]))
                f.write('\n')
Пример #10
0
def analyzeJSCodesFinerBlock(script, display=False):
    try:
        t1 = time.time()
        parser = Parser()
        script = script.strip()
        if script.startswith('<!--') and script.endswith('-->'):
            script = script[4:-3]
        tree = parser.parse(script)
        visitor = MyVisitor(display)
        visitor.visit(tree, 0)
        if len(visitor.first_level_seq) != len(visitor.scripts):
            print >> sys.stderr, "error parsing script: scripts and seqs length inconsistent " + script[:
                                                                                                        100]
            return None, None
        t2 = time.time()
        total_time = t2 - t1
        total_len = float(len(script))
        try:
            portion = [len(x) / total_len for x in visitor.scripts]
            for i in range(len(portion)):
                t = total_time * portion[i]
                print "AST_TIME: %f %d" % (t, len(visitor.scripts[i]))
        except:
            pass
        return visitor.first_level_seq, visitor.scripts
    except Exception as e:
        print >> sys.stderr, "error parsing script: " + str(
            e) + " || [START]" + script[:100] + "[END]"
        return None, None
Пример #11
0
def get_property_attributes(url):
    response = requests.get(url)

    #html parser
    soup = BeautifulSoup(response.text, 'html.parser')
    script = soup.findAll('script', {'type': 'text/javascript'})[3]

    # if ad link returns valid search result, scan for attributes, else skip
    if soup.title.string.find('Real Estate Properties') == -1:
        # if ad is archived, put in dummy date, else get real date
        if soup.find("span", "status-label label-archive") != None:
            date = '31 Dec 9999'
        else:
            #get date from title of advertisement
            date = re.findall(r'\d{2}\s\w{3}\s\d{4}', soup.title.string)[0]

        #javascript parser
        parser = Parser()
        tree = parser.parse(script.text)
        fields = {
            getattr(node.left, 'value', ''): getattr(node.right, 'value', '')
            for node in nodevisitor.visit(tree)
            if isinstance(node, ast.Assign)
        }
        fields.update({'"date sold"': '"' + date + '"'})
        return fields
    else:
        return None
Пример #12
0
def chapter_url2image_urls(chapter_url):
    g = get_info_from_url(chapter_url, chapter2images)

    p = Parser()
    for t2_or_t3, (slot_idx, pattern_idx, info_pattern) in g:
        tag, _, data = t2_or_t3
        #p = Parser()
        tree = p.parse(data)
        pre = None
        for node in nodevisitor.visit(tree):
            if isinstance(node, ast.Identifier) and node.value == 'image_list':
                break
            pre = node

    assert pre != None
    m = _image_list_match_pattern.match(pre.to_ecma())
    assert m != None
    image_list = eval(m.group(1))
    image_list = eval(image_list)

    ls = []
    for info in image_list.values():
        src = base64.b64decode(info['src']).decode('ascii')
        page = info['page']
        ls.append((page, src))

    ls.sort()
    ls = tuple(src for _, src in ls)

    return ls
Пример #13
0
 def assertUnusedObjects(self, source, expected):
     parser = Parser()
     tree = parser.parse(source)
     uvisit = UnusedObjectsVisitor()
     uvisit.do(tree)
     self.maxDiff = None
     self.assertSequenceEqual(tree.to_ecma(), expected)
Пример #14
0
def parse_global_js_for_access_id_action_url(global_js):
    parser = Parser()
    tree = parser.parse(global_js)

    parts = ['protocol', 'roDomain', 'ro', 'rt']
    UrlParts = namedtuple('UrlParts', parts)
    url_parts = UrlParts([], [], [], [])

    getvalue = operator.attrgetter('value')
    err = "Too many '{}' assignments in global.js."
    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.Assign):
            try:
                left_value = getvalue(node.left).strip('\'"')
            except AttributeError:
                continue

            if left_value in parts:
                right_value = getvalue(node.right).strip('\'"')
                assert right_value not in getattr(
                    url_parts, left_value), err.format('protocol')
                getattr(url_parts, left_value).append(right_value)

    return url_parts.protocol[0] + url_parts.roDomain[0] + url_parts.ro[
        0] + url_parts.rt[0]
Пример #15
0
def minify(text, mangle=False):
    parser = Parser()
    tree = parser.parse(text)
    if mangle:
        mangler.mangle(tree)
    minified = ECMAMinifier().visit(tree)
    return minified
Пример #16
0
def inital_check_for_obfuscation_condtiion_sensitiveFunctions(js_text):
    parser = Parser()
    tree = parser.parse(js_text)

    keywords = set()
    if_condition = False

    for node in nodevisitor.visit(tree):

        if isinstance(node, If):
            if_condition = True
            continue

        stack = [node]

        #BFS to go to every depth of the AST tree
        while stack:
            node = stack.pop()
            #only dot access has a.b.getStringfromChar
            if isinstance(node, DotAccessor):
                try:
                    for i in node.children():
                        stack.append(i)
                except:
                    pass

                continue

            if isinstance(node, Identifier):
                #print (node.value),
                keywords.add(node.value)

    #print ("Done visit")
    obfuscation = False
    profiling = False

    if if_condition:
        pass

    ob_list = set()
    pro_list = set()

    for ob in obfuscation_function_names:
        if ob in keywords:
            #print ("[Obfuscation keywords]", ob)
            obfuscation = True
            ob_list.add(ob)
            #break

    for pro in profiling_function_names:
        if pro in keywords:
            #print ("[Profiling keywords]", pro)
            profiling = True
            pro_list.add(pro)
            #break

    #print ('if_condition: {}, obfuscation {}, profiling {}'.format(if_condition,obfuscation,profiling))
    #pint (js_text)
    return if_condition, obfuscation, profiling, ob_list, pro_list
Пример #17
0
 def assertFoldingObjects(self, source, expected):
     parser = Parser()
     tree = parser.parse(source)
     uvisit = foldingvisitor.FoldingVisitor()
     uvisit.do(tree)
     print(tree.to_ecma())
     self.maxDiff = None
     self.assertSequenceEqual(tree.to_ecma(), expected)
Пример #18
0
def invJSToZ3(inv, typeEnv):
    p = Parser()
    t = p.parse(inv)

    assert (isinstance(t, jsast.Program))
    assert (len(t.children()) == 1)
    assert (isinstance(t.children()[0], jsast.ExprStatement))
    return jsToZ3Expr(t.children()[0].expr, typeEnv)
Пример #19
0
def get_m_decl(fil):
  with codecs.open(fil, 'r', encoding='utf8') as fd:
    s = fd.read()
    tree = Parser().parse(s)
    m = None
    for node in nodevisitor.visit(tree):
      if isinstance(node, ast.VarDecl) and node.identifier.value == 'm':
        m = node.initializer.to_ecma()
    return m
Пример #20
0
 def test_throw_statement(self):
     # expression is not optional in throw statement
     input = textwrap.dedent("""
     throw
       'exc';
     """)
     parser = Parser()
     # ASI at lexer level should insert ';' after throw
     self.assertRaises(SyntaxError, parser.parse, input)
Пример #21
0
 def test_bug_no_semicolon_at_the_end_of_block_plus_newline_at_eof(self):
     # https://github.com/rspivak/slimit/issues/3
     text = textwrap.dedent("""
     function add(x, y) {
       return x + y;
     }
     """)
     parser = Parser()
     tree = parser.parse(text)
     self.assertTrue(bool(tree.children()))
Пример #22
0
def parse_script(data):
    # Hack. Fix javascript syntax issue in steam's response
    to_replace = 'BuildGameRow(game, )'
    replacement = 'BuildGameRow(game, 0)'
    data = data.replace(to_replace, replacement)
    parser = Parser()
    tree = parser.parse(data)
    variables = [node for node in nodevisitor.visit(tree)
                 if isinstance(node, ast.VarDecl)]
    return variables
Пример #23
0
        def get_embedded_json():
            # Strip c from s, without exception
            def strip(s, c):
                if isinstance(s, str):
                    return s.strip(c)
                return s

            div = soup.find('div', id='JSDF')
            scripts = div.find_all('script', src=None)

            # Look for $rwidgets
            script_texts = []
            for script in scripts:
                for s in script.contents:
                    if '$rwidgets' in s:
                        script_texts.append(s)

            # Bodge until we get rid of slimit
            with silence_output():
                parser = Parser()

            raw_values = {}
            for script_text in script_texts:
                tree = parser.parse(script_text)
                # Parsing js
                for node in nodevisitor.visit(tree):
                    if isinstance(node, ast.FunctionCall):
                        if isinstance(node.identifier, ast.Identifier):
                            if node.identifier.value == '$rwidgets':
                                # Deal with here
                                fields = {}
                                for n in nodevisitor.visit(node):
                                    if isinstance(n, ast.Assign):
                                        k = getattr(n.left, 'value',
                                                    '').strip('"')
                                        v = strip(
                                            getattr(n.right, 'value', ''), '"')
                                        if k in duplicates:
                                            try:
                                                fields[k].append(v)
                                            except KeyError:
                                                fields[k] = [v]
                                        else:
                                            fields[k] = v

                                # Merge fields and raw_values, resolving duplicates
                                for (k, v) in fields.items():
                                    if k in duplicates:
                                        try:
                                            raw_values[k] += v
                                        except KeyError:
                                            raw_values[k] = v
                                    elif v != 'null':
                                        raw_values[k] = v
            return raw_values
Пример #24
0
 def _test_function_expression(self):
     text = """
     if (true) {
       function() {
         foo;
         location = 'http://anywhere.com';
       }
     }
     """
     parser = Parser()
     parser.parse(text)
Пример #25
0
def parse(text):
    """
    Turn a valid JavaScript source string and turn it into a source tree
    through the Parser provided by the slimit.parser module.
    """

    global _parser
    if _parser is None:
        _parser = Parser()

    return _parser.parse(text)
Пример #26
0
def addAllIntEnv(inv, env=None):
    if (env == None):
        env = {}
    p = Parser()
    t = p.parse(inv)

    for node in nodevisitor.visit(t):
        if isinstance(node, jsast.Identifier):
            env[node.value] = Int

    return env
Пример #27
0
def extract_strings_slimit(javascript: str) -> List[str]:
    from slimit.parser import Parser
    from slimit import ast

    if SHOW_WARNINGS:
        parser = Parser()
    else:
        # File descriptor hackiness to silence warnings
        null_fd = os.open(os.devnull, os.O_RDWR)
        old_fd = os.dup(2)
        try:
            os.dup2(null_fd, 2)
            parser = Parser()
        finally:
            os.dup2(old_fd, 2)
            os.close(null_fd)
            os.close(old_fd)

    # Hack to work around https://github.com/rspivak/slimit/issues/52
    KEYWORDS = r"(?:catch|delete|return|throw)"
    javascript = re.sub(rf"(\.\s*{KEYWORDS})\b", r"\1_", javascript)
    javascript = re.sub(rf"\b({KEYWORDS})(\s*:)", r"'\1'\2", javascript)
    parsed = parser.parse(javascript)
    strings = []

    def add_strings(tree, strings):
        if tree is None:
            return
        if not isinstance(tree, (ast.Node, list, tuple)):
            raise TypeError("Unexpected item: {!r}".format(tree))
        if isinstance(tree, ast.String):
            strings.append(tree.value[1:-1])

        children = tree
        if isinstance(tree, ast.Node):
            children = tree.children()
        for child in children:
            add_strings(child, strings)

    add_strings(parsed, strings)
    return strings
Пример #28
0
def analyzeJSCodes(script, display=False):
    try:
        parser = Parser()
        tree = parser.parse(script)
        visitor = MyVisitor(display)
        visitor.visit(tree, 0)
        #print "first_level_seq: %d" %len(visitor.first_level_seq)
        return visitor.node_order_list
    except Exception as e:
        print >> sys.stderr, "error parsing script: " + str(
            e) + " || " + script
        return None
Пример #29
0
def main():
    args = parse_args()

    with open(args.filename) as f:
        source = f.read()

    parser = Parser()
    tree = parser.parse(source)

    visitor = ConstantReductionVisitor(args.debug)
    tree = visitor.visit(tree)
    print tree.to_ecma()
Пример #30
0
def parse_JavaScript(js):
    global functions
    parser = Parser()
    tree = parser.parse(js)

    for node in nodevisitor.visit(tree):
        if isinstance(node, ast.FuncDecl):
            if len(node.parameters) > 1:
                last = node.parameters[len(node.parameters) - 1]
                first = node.parameters[0]

                # check for the first parameter
                if first.value == "this$static":

                    # check that the last one is a callback
                    if last.value == "callback":

                        # the function will call createStreamWriter if its used in the client interface
                        if "createStreamWriter" in node.to_ecma():
                            params = []

                            # if we have function arguments
                            if len(node.parameters) > 2:

                                # -2 for the 'this' and callback
                                num_of_params = len(node.parameters) - 2

                                for param in node.parameters:

                                    # we just append the arguments we will need to make in the GWT request
                                    if param.value != "this$static" and param.value != "callback":
                                        params.append(param.value)

                            # else we have no arguments
                            else:
                                num_of_params = 0

                            # strip the correct function name
                            function = node.identifier.value.replace("$", "")
                            function = re.sub('_\d+', '', function)

                            # append to a list, since we my have functions of the same name, but different signatures
                            extracted.append({
                                "function":
                                function,
                                "num_of_args":
                                num_of_params,
                                "args":
                                params,
                                "arg_type_data":
                                get_param_types(function, node.to_ecma())
                            })