Пример #1
0
def get_predict_keyword(keyword):
    ret = []
    for word in get_headword(keyword):
        print word
        if word in iotools.load_keywords_dict()['all']:
            ret += tokens.network.keyword_neighbors(word)
    return sorted(ret, key=lambda x: x[1], reverse=True)
Пример #2
0
def get_all_keywords():
    keywords = iotools.load_keywords_dict()
    keywords['all'] = OrderedDict(
        [item for item in sorted(keywords['all'].items(), key=lambda x: len(x[1]), reverse=False)])
    for key in keywords['all']:
        keywords['all'][key] = list(set([dataset for dataset, _ in keywords['all'][key]]))
    dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())])
    return render_template('dataset-keywords.html', keywords=keywords, dataset_ids=dataset_ids)
Пример #3
0
def keyword_neighbors(keyword):
    keywords = iotools.load_keywords_dict()
    keyword_weight = weighting_function(keyword)
    related_datasets = set([x[0] for x in keywords['all'][keyword]])
    ret = {}
    for dataset in related_datasets:
        dataset_keywords = iotools.load_dataset_keywords_dict(dataset)['all']
        for keyword2 in dataset_keywords:
            ret[keyword2] = ret.get(keyword2, 0) + 1

    for keyword2, val in ret.items():
        weight = 1.0 * weighting_function(keyword2) * mylog(val) / keyword_weight / mylog(len(related_datasets))
        ret[keyword2] = weight

    return sorted(ret.items(), key=lambda x: x[1], reverse=True)
Пример #4
0
def discovery():
    keywords = list(iotools.load_keywords_dict()['all'].keys())

    if 'keyword' in request.form:
        keyword = request.form.get('keyword', None)
    elif 'keyword' in request.args:
        keyword = request.args.get('keyword', None)
    else:
        keyword = None

    result = {}

    if keyword:
        result["stopword"] = process_keyword.get_stopword(keyword)
        result["headword"] = process_keyword.get_headword(keyword)
        result["sans"] = process_keyword.get_sans(keyword)
        result["sans_head"] = process_keyword.get_sans_head(keyword)
        result["symantec"] = process_keyword.get_symantec(keyword)
        result["symantec_head"] = process_keyword.get_symantec_head(keyword)
        result["predict"] = process_keyword.get_predict_keyword(keyword)
        result["capec"] = process_keyword.get_capec(keyword)
        result["capec_head"] = process_keyword.get_capec_head(keyword)

    return render_template("keyword-search.html", suggestions=keywords, keyword=keyword, result=result)
Пример #5
0
def tag_cloud_text_new_keywords_weighted():
    ret = []
    for keyword, klist in iotools.load_keywords_dict()['all'].items():
        val = int(weighting_function(keyword)*1000)
        ret += [normalized(keyword)] * val
    return ret
Пример #6
0
def weight_all_keywords():
    return weight_keywords(iotools.load_keywords_dict()['all'])
Пример #7
0
#! /usr/bin/env python -u
# coding=utf-8
from math import log
from utils import iotools

__author__ = 'xl'


keywords = iotools.load_keywords_dict()
def weighting_function(keyword):
    global keywords
    keyword_freq = log(len(keywords['all'][keyword])+1, 2)
    weigth = 1/keyword_freq if keyword_freq > 0 else 0
    return weigth


def weight_keywords(keyword_list):
    ret = []
    for keyword in keyword_list:
        ret.append((keyword, weighting_function(keyword)))

    return sorted(ret, key=lambda x: x[1], reverse=False)


def dataset_weighting_function(dataset):
    keywords = iotools.load_dataset_keywords_dict(dataset['name'])['all']
    keywords_weight = weight_keywords(keywords)
    return sum([x[1] for x in keywords_weight])


def weight_all_datasets():
Пример #8
0
def all_keywords_neighbors():
    ret = {}
    for keyword in iotools.load_keywords_dict()['all']:
        ret[keyword] = keyword_neighbors(keyword)

    return sorted(ret.items(), key=lambda y: len([x[1] for x in y[1]]), reverse=True)