Пример #1
0
def start_server():

    signal.signal(signal.SIGINT, signal_exit_handler)

    backed_off_search_process = None

    if Config.get('CTI')['knowledge_base']['on_miss_backoff']:
        backed_off_search_process = Process(target=backed_off_search)
        backed_off_search_process.start()

    config = Config.get('server')

    server.run(host=config['host'], port=config['port'], threaded=True)

    if Config.get('CTI')['knowledge_base']['on_miss_backoff']:
        backed_off_search_process.join()
Пример #2
0
    def __get_contexts(self, titles):

        contexts = []
        misses = []

        for title, context in self.__contexts_cache.get(titles).items():

            if context is not None:
                contexts.append(context)
            else:
                misses.append(title)

        if self.__on_miss_backoff is True:
            for title in misses:
                SQLiteDict.storage(
                    Config.get('CTI')['backed_off_search']
                    ['storage_name'])[title] = title

        else:

            new_contexts = self.__search_engine.contexts(misses)

            contexts += list(new_contexts.values())

            self.__contexts_cache.set_many(new_contexts)

        return contexts
Пример #3
0
    def __init__(self, cache_name):

        self.__cache_name = cache_name

        if ES.connection('es') is None:
            Logger.log(__name__, 'could not connect to elasticsearch', type='error')
            return

        ESService.create_index(self.__cache_name, Config.get('elasticsearch')['indices_settings'][self.__cache_name])
Пример #4
0
def clear_backedoff_storage():

    storage_path = Data.get(
        Config.get('CTI')['backed_off_search']['storage_name'])

    if os.path.isfile(storage_path):
        os.remove(storage_path)
        Logger.log(__name__, 'the backedoff search storage is now empty')
    else:
        Logger.log(__name__, 'the backedoff search storage is already empty')
Пример #5
0
def handle_cti_request(terms, context):

    if len(terms) == 0:
        return {}

    scored = {}

    if (Config.get('CTI')['max_processes_per_job'] == 1) or (len(terms) == 1):

        for term in terms:
            scored[term] = round(cti.term_informativeness(term, context), 2)

    else:

        with Pool(Config.get('CTI')['max_processes_per_job']) as p:
            for term, score in p.map(
                    partial(cti_job, cti=cti, context=context), terms):
                scored[term] = score

    return scored
Пример #6
0
    def __init__(self):

        self.__config = Config.get('CTI')['knowledge_base']

        self.__on_miss_backoff = self.__config['on_miss_backoff']

        self.__search_engine = SearchEngine(
            top_n_contexts_per_query=self.__config['max_contexts_per_query'])
        self.__query_cache = QueryCache.create(self.__config['query_cache'])
        self.__contexts_cache = ContextsCache.create(
            self.__config['contexts_cache'])
Пример #7
0
def bootstrap_knowledge_base(dump_directory):

    Logger.log(__name__, 'bootstrapping knowledge base...')

    contexts_cache = ContextsCache.create(
        Config.get('CTI')['knowledge_base']['contexts_cache'])

    for article in WikipediaUtils.get_articles(dump_directory,
                                               attributes=['title',
                                                           'context']):
        contexts_cache.set(article['title'], article['context'])
        Logger.log(__name__,
                   'bootstrapping got context for: ' + article['title'])
Пример #8
0
def backed_off_search():

    config = Config.get('CTI')['backed_off_search']

    contexts_cache = ContextsCache.create(
        Config.get('CTI')['knowledge_base']['contexts_cache'])
    search_engine = SearchEngine()

    Logger.log(__name__, 'backed off search process started')
    Logger.log(
        __name__, 'backed off search storage has ' +
        str(len(SQLiteDict.storage(config['storage_name']))) + ' items')

    c = 0

    while True:

        try:
            title = SQLiteDict.storage(config['storage_name']).popitem()[0]
        except KeyError:
            sleep(config['empty_storage_wait_seconds'])
            continue

        contexts_cache.set(title, search_engine.context(title))

        Logger.log(__name__, 'backed off search got context for: ' + title)

        c += 1
        if c > 30:
            c = 0
            Logger.log(
                __name__, 'backed off search storage has ' +
                str(len(SQLiteDict.storage(config['storage_name']))) +
                ' items')

        sleep(config['seconds_between_searches'])
Пример #9
0
 def __setup(self):
     self.__conns = {}
     self.__configs = Config.get('elasticsearch')['connections']
Пример #10
0
def clear_knowledge_base():
    ESService.delete_index(
        Config.get('CTI')['knowledge_base']['query_cache']['name'])
    ESService.delete_index(
        Config.get('CTI')['knowledge_base']['contexts_cache']['name'])
    Logger.log(__name__, 'the knowledge base is now empty')