class Container():
    _THRESHOLD_FOR_CLEANING = eval(
        parse_config.get('detection', 'threshold_for_cleaning'))
    _CAPACITY_FOR_CLEANING = eval(
        parse_config.get('detection', 'capacity_for_cleaning'))

    def __init__(self):
        self.container = {}

    def get(self, _id, _timestamp):
        sig_scores = []
        codes = [
            get_hash(h(), repr(_id)) % 2**_BIT_COUNT for h in hash_available
        ]
        record = []
        for code in codes:
            if code in self.container:
                sig_scores.append(self.container[code])
                record.append(code)
            else:
                if _SIGNI_TYPE == 's':
                    sig_score = fast_signi.SignificanceScorer()
                    self.container[code] = sig_score
                    sig_scores.append(self.container[code])
                    record.append(0)
        return sig_scores, record
Пример #2
0
    def process(self, sig_instance, sig_list=None):
        _t, _count, _ewma, _ewmvar, _sig, _keywords = sig_instance

        if _t < self.start_time or _t > self.end_time:
            return 0.

        if eval(parse_config.get('output', 'debug_info')):
            if sig_list:  # for debugging
                print('-----------------------')
                for sig_ in sig_list:
                    print(('__sig__', sig_))
                print('-----------------------')
        create_new = True

        for thread in self.threads:
            if thread.add_to_thread(sig_instance):
                create_new = False
                break

        if create_new:
            thread = Slice()
            thread.new_thread(sig_instance)

            self.threads.append(thread)

            return _sig

        return 0.
Пример #3
0
    def __init__(self, _stream):
        self.stream = _stream
        self.threads = list()
        _window_size = eval(parse_config.get('significance', 'window_size'))
        _cycle = eval(parse_config.get('significance', 'cycle'))
        _average = eval(parse_config.get('significance', 'average'))

        fast_signi.SignificanceScorer.set_window_size(_window_size, _cycle,
                                                      _average)

        _start_time = parse_config.get('detection', 'start_time')
        _end_time = parse_config.get('detection', 'end_time')
        self.processor = signi_processor.SigniProcessor()

        self.start_time = datetime.strptime(_start_time, '%Y-%m-%d %H:%M:%S')
        self.end_time = datetime.strptime(_end_time, '%Y-%m-%d %H:%M:%S')
Пример #4
0
    def __next__(self):
        ptweet = next(self.stream)
        if ptweet is stream.End_Of_Stream:
            return stream.End_Of_Stream

        if ptweet is None:
            return None
        sig_instance, sig_list = self.processor.process(ptweet)
        #         print(sig_instance)
        if sig_instance is not None:
            output = self.process(sig_instance, sig_list)

            if eval(parse_config.get('output', 'debug_info')):
                print(sig_instance)
            return ptweet, output

        return ptweet, 0.0
Пример #5
0
 async def get_game_time(self, game_id: str, context: Any) -> GameTime:
     time_played = config.get("minutes")
     last_played = 1514768400
     return GameTime(game_id, time_played, last_played)
Пример #6
0
import stream
from parse_config import config as parse_config
import signi_processor
from datetime import datetime, timedelta
import fast_signi

_THREAD_GAP = eval(parse_config.get('detection', 'thread_gap'))


class Slice:
    def __init__(self):
        self.start = 0.0
        self.end = 0.0
        self.keywords = None
        self.sig = 0.0
        self.first_sig = 0.0
        self.thread = []
        self.first_keywords = None

    def new_thread(self, sig_instance):
        _t, _count, _ewma, _ewmvar, _sig, _keywords = sig_instance
        self.start = _t
        self.end = _t
        _kw1 = _keywords[0]
        _kw2 = _keywords[1]
        self.keywords = set([_kw1, _kw2])
        self.thread.append(sig_instance)
        self.first_sig = _sig
        self.sig = _sig
        self.first_keywords = _kw1 + ',' + _kw2
Пример #7
0
from nltk.stem.lancaster import *
from nltk.stem.snowball import *
from nltk.stem.porter import *

from parse_config import config

stemmer = None
if config.get('pre_process', 'stemmer') == 'Snowball':
    stemmer = SnowballStemmer("english")

if config.get('pre_process', 'stemmer') == 'Porter':
    stemmer = PorterStemmer()

if config.get('pre_process', 'stemmer') == 'Lancaster':
    stemmer = LancasterStemmer()


def stem(word):
    if stemmer is None:
        print('none')
        return word

    try:
        ret = stemmer.stem(word)
        ret = str(ret)
    except:
        ret = word

    return ret
from parse_config import config as parse_config
import fast_signi
import stemmer
import hashlib

_SIGNI_THRESHOLD = eval(parse_config.get('detection', 'detection_threshold'))

_SIGNI_TYPE = parse_config.get('detection', 'detection_signi_type')

_BIT_COUNT = eval(parse_config.get('detection', 'bit_count'))

# hash_available=[hashlib.md5, hashlib.sha1, hashlib.sha224, hashlib.sha256, hashlib.sha384,hashlib.sha512]

hash_available = [hashlib.md5]


def get_hash(hash_function, x: str):
    """Returns a given string's hash value, obtained by the given hashlib instance."""
    hash_function.update(x.encode())
    return int.from_bytes(hash_function.digest(), byteorder="big")


class Container():
    _THRESHOLD_FOR_CLEANING = eval(
        parse_config.get('detection', 'threshold_for_cleaning'))
    _CAPACITY_FOR_CLEANING = eval(
        parse_config.get('detection', 'capacity_for_cleaning'))

    def __init__(self):
        self.container = {}