示例#1
0
 def analysis_rss(self, list_obj):
     logging.info('-- analysis rss --')
     if self.lclass == "intelligence":
         for i in self.content:
             u = {
                 "class":
                 self.lclass,
                 "title":
                 self.get_value(i, list_obj['response']['title']).strip(),
                 "summary":
                 self.get_value(i, list_obj['response']['summary']),
                 "publish_time":
                 time.strftime(
                     "%Y-%m-%d %H:%M",
                     self.get_value(i,
                                    list_obj['response']['publish_time'])),
                 "source":
                 self.get_value(i, list_obj['response']['source']),
                 "raw_url":
                 self.get_value(i, list_obj['response']['raw_url'])
             }
             url = u['raw_url']
             uhash = str(md5(url))
             if self.unique_url(url):
                 u["rhash"] = uhash
                 redis_c.lpush('result', json.dumps(u))
                 logging.info('-- push url %s --' % url)
             else:
                 logging.info('-- exist url %s --' % url)
示例#2
0
    async def analysis(self, list_obj):
        logging.info('-- analysis event --')
        self.content = BeautifulSoup(self.content, 'html.parser')
        if list_obj['pattern']['type'] == "list":
            if list_obj['pattern'].get('class'):
                list_dom = self.content.find_all(
                    'li', class_=list_obj['pattern']['class'])
            elif list_obj['pattern'].get('selector'):
                list_dom = self.content.select(list_obj['pattern']['selector'])
        if list_obj['pattern']['type'] == "table":
            list_dom = self.content.find_all('tr')
        if list_obj['pattern']['type'] == "h2":
            if list_obj['pattern'].get('class'):
                list_dom = self.content.find_all(
                    'h2', class_=list_obj['pattern']['class'])
            else:
                list_dom = self.content.find_all('h2')

        base_url = "%s://%s" % (self.url_info.scheme, self.url_info.netloc)

        for i in list_dom:
            try:
                text = i.get_text()
                base_time = re.search(list_obj['basetime']['pattern'],
                                      text).group()

                url_dom = i.find('a')
                try:
                    u = url_dom[list_obj['pattern']['title_key']]
                except Exception:
                    # default href
                    u = url_dom['href']
                try:
                    length = list_obj['pattern']['length'].split(":")
                    if length[0] and length[1]:
                        u = u[int(length[0]):int(length[1])]
                    elif length[0] and not length[1]:
                        u = u[int(length[0]):]
                    elif not length[0] and length[1]:
                        u = u[:int(length[1])]
                except Exception:
                    pass

                url = base_url + u
                u = {
                    "class": self.lclass,
                    "type": self.ltype,
                    "url": url,
                    "event_type": list_obj['event_type'],
                    "basetime": base_time + " 00:00:00"
                }
                if self.unique_url(str(url)):
                    redis_c.lpush('target', json.dumps(u))
                    logging.info('-- push url %s --' % url)
                else:
                    logging.error('-- exist url %s --' % url)
            except Exception:
                pass
示例#3
0
 async def analysis_update(self, obj):
     logging.info('-- START ANALYSIS UPDATE DETAIL PAGE --')
     for k, v in obj.items():
         if not k or not v:
             raise Exception('config error')
         self.result[k] = self.get_value(v)
     self.result['class'] = "update"
     logging.info('-- FINISH ANALYSIS DETAIL PAGE --')
     logging.info(self.result)
     redis_c.lpush('result', json.dumps(self.result))
示例#4
0
 async def analysis(self, obj):
     logging.info('-- START ANALYSIS DETAIL PAGE --')
     try:
         for k, v in obj.items():
             if not k or not v:
                 logging.error('-- ERROR %s %s --' % (k, v))
                 raise Exception('config error')
             self.result[k] = self.get_value(v)
         self.result['class'] = "event"
         logging.info('-- FINISH ANALYSIS DETAIL PAGE --')
         logging.info(self.result)
         redis_c.lpush('result', json.dumps(self.result))
     except Exception as e:
         logging.info('-- ERROR %s --' % e)
示例#5
0
文件: test.py 项目: betta-cyber/sharp
# -*- coding: utf-8 -*-

import json
from utils import redis_c, load_yaml

# a = {"type": "tc260", "url": "https://www.tc260.org.cn/front/postDetail.html?id=20200527151336"}
a = {"type": "cert", "class": "event"}
# a = {"type": "cnvd", "class": "vul"}
# a = {"type": "cnnvd", "class": "vul"}
# a = {"type": "freebuf", "class": "intelligence"}
# b = {"type": "xz", "class": "intelligence"}
# c = {"type": "seebug", "class": "intelligence"}

# a = {"type": "snyk", "class": "vul"}

# a = {"type": "djbh", "url": "http://www.djbh.net/webdev/web/HomeWebAction.do?p=getXxgg&id=8a8182566ed3d102016fa6d2737f0034", "event_type": "法文法规"}
# a = {"type": "tc260", "url": "https://www.tc260.org.cn/front/postDetail.html?id=20200527151336", "event_type": "法文法规"}

redis_c.lpush("list", json.dumps(a))

# redis_c.lpush("list", json.dumps(a))
# redis_c.lpush("list", json.dumps(b))
# redis_c.lpush("list", json.dumps(c))
示例#6
0
    def analysis_html(self, list_obj):
        logging.info('-- analysis html --')
        # 如果不是bs的话,转换一道
        if not isinstance(self.content, BeautifulSoup):
            self.content = BeautifulSoup(self.content, 'html.parser')

        if self.lclass == "intelligence":
            try:
                if list_obj['pattern']['type'] == "list":
                    if list_obj['pattern'].get('class'):
                        list_dom = self.content.find_all(
                            'div', class_=list_obj['pattern']['class'])
                    elif list_obj['pattern'].get('selector'):
                        list_dom = self.content.select(
                            list_obj['pattern']['selector'])
                if list_obj['pattern']['type'] == "table":
                    list_dom = self.content.find_all('tr')
                if list_obj['pattern']['type'] == "h2":
                    if list_obj['pattern'].get('class'):
                        list_dom = self.content.find_all(
                            'h2', class_=list_obj['pattern']['class'])
                    else:
                        list_dom = self.content.find_all('h2')

                self.current_obj = list_obj
                for i in list_dom:
                    # url is the most import thing
                    u = {
                        "class":
                        self.lclass,
                        "title":
                        self.get_value(i,
                                       list_obj['response']['title']).strip(),
                        "summary":
                        self.get_value(i, list_obj['response']['summary']),
                        "publish_time":
                        self.get_value(i,
                                       list_obj['response']['publish_time']),
                        "source":
                        self.get_value(i, list_obj['response']['source']),
                        "raw_url":
                        self.get_value(i, list_obj['response']['raw_url'])
                    }
                    url = u['raw_url']
                    uhash = str(md5(url))
                    if self.unique_url(url):
                        u["rhash"] = uhash
                        redis_c.lpush('result', json.dumps(u))
                        logging.info(u)
                        logging.info('-- push url %s --' % url)
                    else:
                        logging.info('-- exist url %s --' % url)
            except Exception as e:
                logging.error('-- error %s --' % e)
        if self.lclass == "vul":
            if list_obj['pattern']['type'] == "list":
                if list_obj['pattern'].get('class'):
                    list_dom = self.content.find_all(
                        'div', class_=list_obj['pattern']['class'])
                elif list_obj['pattern'].get('selector'):
                    list_dom = self.content.select(
                        list_obj['pattern']['selector'])
            if list_obj['pattern']['type'] == "table":
                table = self.content.find_all('table')
                list_dom = table[0].tbody.find_all('tr')
            if list_obj['pattern']['type'] == "h2":
                if list_obj['pattern'].get('class'):
                    list_dom = self.content.find_all(
                        'h2', class_=list_obj['pattern']['class'])
                else:
                    list_dom = self.content.find_all('h2')

            self.current_obj = list_obj
            for i in list_dom:
                # url is the most import thing
                u = {
                    "class":
                    self.lclass,
                    "type":
                    self.ltype,
                    "source":
                    self.get_value(i, list_obj['response']['source']),
                    "title":
                    self.get_value(i, list_obj['response']['title']).strip(),
                    "url":
                    self.get_value(i, list_obj['response']['url']),
                }
                url = u['url']
                uhash = str(md5(url))
                if self.unique_url(url):
                    u["rhash"] = uhash
                    redis_c.lpush('target', json.dumps(u))
                    logging.info('-- push url %s --' % url)
        if self.lclass == "update":
            logging.info('-- html update analysis --')
            if list_obj['pattern']['type'] == "h2":
                if list_obj['pattern'].get('class'):
                    lists = self.content.find_all(
                        'h2', class_=list_obj['pattern']['class'])
                else:
                    lists = self.content.find_all('h2')
            else:
                lists = self.content.select(list_obj['pattern']['selector'])

            self.current_obj = list_obj
            for i in lists:
                u = {
                    "class":
                    self.lclass,
                    "type":
                    self.ltype,
                    "source":
                    self.get_value(i, list_obj['response']['source']),
                    "url":
                    self.get_value(i, list_obj['response']['url']),
                    "title":
                    self.get_value(i, list_obj['response']['title']).strip(),
                }
                url = u['url']
                uhash = str(md5(url))
                if self.unique_url(url):
                    u["rhash"] = uhash
                    redis_c.lpush('target', json.dumps(u))
                    logging.info('-- push url %s --' % url)
                else:
                    logging.info('-- exist url %s --' % url)
示例#7
0
    def analysis_json(self, list_obj):
        logging.info('-- analysis json --')
        if self.lclass == "intelligence":
            for i in self.content[list_obj['pattern']['selector']]:
                u = {
                    "class":
                    self.lclass,
                    "raw_url":
                    self.get_value(i, list_obj['response']['raw_url']),
                    "title":
                    self.get_value(i, list_obj['response']['title']).strip(),
                    "summary":
                    self.get_value(i, list_obj['response']['summary']),
                    "publish_time":
                    self.get_value(i, list_obj['response']['publish_time']),
                    "source":
                    self.get_value(i, list_obj['response']['source'])
                }
                url = u['raw_url']
                uhash = str(md5(url))
                if self.unique_url(url):
                    u["rhash"] = uhash
                    redis_c.lpush('result', json.dumps(u))
                    logging.info('-- push url %s --' % url)
                else:
                    logging.error('-- exist url %s --' % url)

        elif self.lclass == "event":
            for i in self.content['list']:
                url = i[list_obj['pattern']['key']]
                if self.unique_url(url):
                    u = {
                        "class": self.lclass,
                        "type": self.ltype,
                        "url": url,
                        "event_type": list_obj['event_type'],
                        "basetime": i[list_obj['basetime']['key']][:-2]
                    }
                    redis_c.lpush('target', json.dumps(u))
                    logging.info('-- push url %s --' % url)
                else:
                    logging.error('-- exist url %s --' % url)

        elif self.lclass == "update":
            for i in self.content:
                # logging.info(i)
                # update 组件更新情报的数据结构
                u = {
                    "class":
                    self.lclass,
                    "raw_url":
                    self.get_value(i, list_obj['response']['url']),
                    "component":
                    self.get_value(i, list_obj['response']['component']),
                    "commit_time":
                    self.get_value(i, list_obj['response']['commit_time']),
                    "description":
                    self.get_value(i, list_obj['response']['description']),
                    "source":
                    self.get_value(i, list_obj['response']['source']),
                    "update_type":
                    self.get_value(i, list_obj['response']['update_type']),
                    "cve_id":
                    self.get_value(i, list_obj['response']['cve_id']),
                    "version":
                    self.get_value(i, list_obj['response']['version']),
                    "level":
                    self.get_value(i, list_obj['response']['level']),
                    "source_platform":
                    self.get_value(i, list_obj['response']['source_platform']),
                    "commit_user":
                    self.get_value(i, list_obj['response']['commit_user']),
                    "update_title":
                    self.get_value(i, list_obj['response']['update_title']),
                }
                url = u['raw_url']
                if self.unique_url(url):
                    uhash = str(md5(url))
                    u["source_hash"] = uhash
                    redis_c.lpush('result', json.dumps(u))
                    logging.info('-- push url %s --' % url)
                else:
                    logging.error('-- exist url %s --' % url)
示例#8
0
def event_clawer():
    for i in ['miit', 'cert']:
    # for i in ['miit', 'cac', 'tc260', 'cert', 'djbh']:
        data = {'type': i, 'class': 'event'}
        redis_c.lpush("list", json.dumps(data))
示例#9
0
def update_clawer():
    print("vul start")
    for i in ['github', 'postgresql', 'tsrc']:
        data = {'type': i, 'class': 'update'}
        redis_c.lpush("list", json.dumps(data))
示例#10
0
def vul_clawer():
    print("vul start")
    for i in ['cnvd', 'cnnvd']:
        data = {'type': i, 'class': 'vul'}
        redis_c.lpush("list", json.dumps(data))
示例#11
0
def intelligence_clawer():
    print("ti start")
    for i in ['anquanke', 'xz', 'doonsec', 'cnvd', 'seebug', 'freebuf']:
        data = {'type': i, 'class': 'intelligence'}
        redis_c.lpush("list", json.dumps(data))