def __init__(self):
        # ====  Required vars ===== #
        self.stdin_path = '/dev/null'
        self.stdout_path = '/dev/null'
        self.stderr_path = '/dev/null'
        # self.pidfile_path =  '/var/run/toidaemon/toidaemon.pid'
        self.pidfile_path = PID_FILE_PATH
        self.pidfile_timeout = 5
        # ========================= #

        self.db_name = DB_PATH
        self.db = SQLite(self.db_name)
        self.table = self.db.get(ToiScraper.TABLE_NAME)
        print("Initializing...")
        if not self.table:
            print("No table found with name {0}. Creating it.".format(
                ToiScraper.TABLE_NAME))
            self.table = self.db.create(ToiScraper.TABLE_NAME,
                                        ToiScraper.TABLE_SCHEMA)
        else:
            if not self.table.get_info() == ToiScraper.TABLE_SCHEMA:
                error_str = "Table {0} exists but with incorrect schema".format(
                    ToiScraper.TABLE_NAME)
                print(error_str)
                raise Exception(error_str)
        self.iter_date = self._get_init_date_full()
def get_top(table='secwiki', column='domain', time=2020, top=10):
    """
	取top数据作饼图
	:param table:
	:param column:
	:param time:
	:param top:
	:return type:dict
		:return value:percentage(domain top10+other)
	"""
    so = SQLite("data/secwiki.db")
    sql = "select {column},count(url) as ct from {table} \
		  where ts like '%{time}%' \
		  group by {column} \
		  order by ct DESC".format(column=column, table=table, time=time)
    r = so.query(sql)

    od = OrderedDict()
    for i in r:
        od[i[0]] = i[1]

    od_pec = dict()
    i = 0
    for k, v in od.items():
        if i < top:
            od_pec[k] = round(v / sum(od.values()), 4)
        else:
            break
        i = i + 1
    od_pec['other'] = round(1 - sum(od_pec.values()), 4)
    return od_pec
def parse_all(fnames, reparse=False):
    """
    格式化为ts、tag、url、title、root_domain、domain、url_path
    :param reparse:是否重新全部解析
    :return:
    """
    sqldb = SQLite('data/secwiki.db')

    # 判断是否重新全部解析
    if reparse:
        fnames = []
        gen_file = glob.iglob(r'data/html/secwiki_*.html')
        sql = 'delete from `secwiki`'
        for gfile in gen_file:
            fnames.append(gfile)
        sqldb.execute(sql)

    if fnames is None:
        print('No new secwiki')
        return

    sql = 'insert into `secwiki` (`ts`,`tag`,`url`,`title`,`root_domain`,`domain`,`url_path`) values(?,?,?,?,?,?,?);'

    for fname in fnames:
        # 判断目标文件本地是否存在
        m = re.search(r'secwiki_(\d+)\.html', fname)
        rname = m.group(1)
        rname = path('data/txt', 'secwiki_' + rname + '.txt')
        if not os.path.exists(path("data/txt")):
            os.mkdir(path("data/txt"))
        if os.path.exists(rname) and os.path.getsize(rname) > 0:
            continue

        # 待统一写入目标文件
        rf = codecs.open(rname, mode='wb')

        # 读本地源文件并解析
        with codecs.open(fname, 'rb') as f:
            all_content = {}
            #print(fname)
            for content in parse_single(f):
                if content:
                    # 解析完写入目标文件
                    k = content[0] + content[2]
                    all_content[k] = content
                    line = "\t".join(content)
                    rf.write(line.encode() + b'\r\n')

            # 批量存入sqlite3
            if all_content:
                sqldb.executemany(sql, all_content.values())

        rf.close()
示例#4
0
def load():
    """
    载入nvd.nist原始exp标记数据
    """
    # 取CVE exp白样本
    so = SQLite('data/nvd.db')
    sql = 'select CVE_Items_cve_CVE_data_meta_ID,CVE_Items_cve_description_description_data_value from nvd_cve where CVE_Items_cve_references_reference_data_tags not like "%Exploit%"'
    cve_0 = sql2cve(so, sql)
    cve_0['label'] = 0

    # 取CVE exp黑样本
    sql = 'select CVE_Items_cve_CVE_data_meta_ID,CVE_Items_cve_description_description_data_value from nvd_cve where CVE_Items_cve_references_reference_data_tags like "%Exploit%"'
    cve_1 = sql2cve(so, sql)
    cve_1['label'] = 1

    cve = pd.concat([cve_0, cve_1])
    print(cve.head())
    cve.to_csv('cve2.csv', index=False)
    return cve
示例#5
0
        ts = float(ts)
    ts_str = datetime.datetime.fromtimestamp(ts).strftime(
        '%Y-%m-%d %H:%M:%S.%f')
    return ts_str


def get_md5(path):
    return hashlib.md5(open(path, 'rb').read()).hexdigest()


def get_sha1(path):
    return hashlib.sha1(open(path, 'rb').read()).hexdigest()


if __name__ == '__main__':
    sqlite = SQLite('data.db')
    # sqlite.insert()
    while True:
        input_dir = input('Enter folder path: ')
        if isinstance(input_dir, str):
            work_dir = input_dir
            break
        else:
            pass
    # work_dir = 'D:\\共享区'
    for parent, dirnames, filenames in os.walk(work_dir, followlinks=True):
        for filename in filenames:
            file_path = os.path.join(parent, filename)
            file_attr = stat(file_path)
            attr_list = [
                file_attr.st_mode, file_attr.st_uid, file_attr.st_gid,