예제 #1
0
def parse_edb(raw_content, report_link):
    tree = html.fromstring(raw_content)
    keyword_section = tree.xpath('/html/body/div/div[2]/div[2]/div/div/div[1]/div/div[1]/h1/text()')
    if len(keyword_section) > 0:
        title = keyword_section[0].replace('\n', ' ')
        cve_line = tree.xpath('/html/body/div/div[2]/div[2]/div/div/div[1]/div/div[2]/div[1]/div[1]/div/div[1]/div/div/div/div[2]/h6/a/text()')

        link = report_link.replace('/exploits/', '/raw/')
        page = requests.get(link, timeout=60, headers={'User-Agent': "Magic Browser"})

        crawl_result = crawl_content_from_link(link)
        if crawl_result is None:
            return {}
        str_content, _ = crawl_result

        str_content = str(page.content).replace('\r\n', ' ')

        if len(cve_line) > 0:
            cve_line = encode_content(cve_line[0])
            for i in cve_line:
                if not (i.isdigit() or i == '-'):
                    cve_line = ''
                    break
            return {'cve_id': ['CVE-' + cve_line], 'title': encode_content(title), 'content': encode_content(str_content)}
        else:
            return {'cve_id': [], 'title': encode_content(title), 'content': encode_content(str_content)}
    print('ERROR in parse_edb')
    return {}
def parse_securitytracker(raw_content, clean_content):
    content = str(raw_content)
    # print(clean_content)
    keyword_section = content[content.find('<title>') +
                              7:content.find('</title>') - 18].replace(
                                  '\n', ' ')

    if len(keyword_section) > 0:
        cve_id_list = []
        start_loc = clean_content.find('CVE Reference:')
        end_loc = clean_content.find('(Links to External Site)')
        if start_loc < end_loc and start_loc != -1 and end_loc != -1:
            str_content = clean_content[start_loc:end_loc]
            matched_cve = regex_cve(str_content)
            if len(matched_cve) > 0:
                cve_id_list.append(matched_cve[0])

        start_loc = clean_content.find('Version(s):')
        end_loc = clean_content.find('Description:')
        if start_loc < end_loc and start_loc != -1 and end_loc != -1:
            str_content = clean_content[start_loc + len('Version(s):'):end_loc]
            return {
                'cve_id': cve_id_list,
                'title': encode_content(keyword_section),
                'content': encode_content(str_content)
            }

    print('ERROR in parse_securitytracker')
    return {}
def parse_securityfocus_forum(raw_content, clean_content):
    tree = html.fromstring(raw_content)
    keyword_section = tree.xpath('//*[@id="comments"]/div/a/text()')
    if len(keyword_section) > 0:
        title = keyword_section[0].replace('\n', ' ')
        str_content = ''
        for i in tree.xpath('//*[@id="comments"]/div/div/text()'):
            str_content += i.replace('\n', ' ')
        return {
            'cve_id': get_cve_id_list(title, clean_content),
            'title': encode_content(title),
            'content': encode_content(str_content)
        }
    print('ERROR in parse_securityfocus_forum')
    return {}
def parse_openwall(raw_content, clean_content):
    tree = html.fromstring(raw_content)
    msg = tree.xpath('/html/body/pre/text()')
    if len(msg) >= 1:
        msg = msg[0]
        loc_start = msg.find('Subject:')
        if loc_start != -1:
            loc_end = msg.find('\n\n', loc_start)
            title = msg[loc_start:loc_end].replace('\n', ' ')
            str_content = ''
            for i in tree.xpath('/html/body/pre/text()'):
                str_content += i.replace('>\n', ' ').replace('\n', ' ')
            return {
                'cve_id': get_cve_id_list(title, clean_content),
                'title': encode_content(title),
                'content': encode_content(str_content)
            }
    print('ERROR in parse_openwall')
    return {}
def parse_securityfocus_official(raw_content):
    tree = html.fromstring(raw_content)
    keyword_section = tree.xpath('//*[@id="vulnerability"]/span//text()')
    str_content = ''
    if len(keyword_section) > 0:
        keyword_section = keyword_section[0]
        for i in tree.xpath(
                '//*[@id="vulnerability"]/table/tr[9]/td[2]/text()'):
            str_content += i
        cve_id = tree.xpath(
            '//*[@id="vulnerability"]/table/tr[3]/td[2]/text()')
        cve_id_list = []
        for line in cve_id:
            matched_cve = regex_cve(line)
            if len(matched_cve) > 0:
                cve_id_list.append(matched_cve[0])
        return {
            'cve_id': cve_id_list,
            'title': encode_content(keyword_section),
            'content': encode_content(str_content)
        }
    print('ERROR in parse_securityfocus_official')
    return {}