def run(self): one_second = timedelta(seconds=1) while self.running: now = datetime.now() wake_time = now + one_second window_start = now - timedelta(seconds=self.time_intervals[-1]) self.freqs_lock.acquire() copy_of_freqs = list([(tweet.datetime_from_tweet_created_at(k),v) for k, v in self.pointer_to_freqs.iteritems() if tweet.datetime_from_tweet_created_at(k) > window_start]) print 'len - %d' % len(copy_of_freqs) self.freqs_lock.release() if copy_of_freqs: for interval in self.time_intervals: pr, dt = post_rate_and_datetime(copy_of_freqs, interval) if pr > self.post_rate_threshold: i = 1 break sleep_seconds = (wake_time - datetime.now()).total_seconds() if sleep_seconds > 0: sleep(sleep_seconds)
def run(self): one_second = timedelta(seconds=1) while self.running: now = datetime.now() wake_time = now + one_second window_start = now - timedelta(seconds=self.time_intervals[-1]) self.freqs_lock.acquire() copy_of_freqs = list([ (tweet.datetime_from_tweet_created_at(k), v) for k, v in self.pointer_to_freqs.iteritems() if tweet.datetime_from_tweet_created_at(k) > window_start ]) print 'len - %d' % len(copy_of_freqs) self.freqs_lock.release() if copy_of_freqs: for interval in self.time_intervals: pr, dt = post_rate_and_datetime(copy_of_freqs, interval) if pr > self.post_rate_threshold: i = 1 break sleep_seconds = (wake_time - datetime.now()).total_seconds() if sleep_seconds > 0: sleep(sleep_seconds)
def get_age_seconds(self): sorted_times = sorted([ tweet.datetime_from_tweet_created_at(s) for s in self.frequencies.keys() ]) return (sorted_times[-1] - sorted_times[0]).total_seconds()
def get_window(freqs, seconds, window_end): freqs_with_datetime = [(tweet.datetime_from_tweet_created_at(k), v) for k,v in freqs] sorted_freqs = sorted(freqs_with_datetime, key=lambda (k,v):k) return get_window_of_sorted_freqs(sorted_freqs, seconds, window_end)
def post_rate_and_datetime1(freqs, seconds, window_end): freqs_with_datetime = [(tweet.datetime_from_tweet_created_at(k), v) for k,v in freqs] sorted_freqs = sorted(freqs_with_datetime, key=lambda (k,v):k) window = get_window(sorted_freqs, seconds, window_end)
def get_window(freqs, seconds, window_end): freqs_with_datetime = [(tweet.datetime_from_tweet_created_at(k), v) for k, v in freqs] sorted_freqs = sorted(freqs_with_datetime, key=lambda (k, v): k) return get_window_of_sorted_freqs(sorted_freqs, seconds, window_end)
def post_rate_and_datetime1(freqs, seconds, window_end): freqs_with_datetime = [(tweet.datetime_from_tweet_created_at(k), v) for k, v in freqs] sorted_freqs = sorted(freqs_with_datetime, key=lambda (k, v): k) window = get_window(sorted_freqs, seconds, window_end)
def get_post_rates(tweet_freqs, t): sorted_freqs = sorted([(k, v) for k, v in tweet_freqs.iteritems()]) one_second = timedelta(seconds=1) intervals = [10, 20, 30, 60] res = [] for k, v in sorted_freqs: dt = T.datetime_from_tweet_created_at(k) window_end = dt - one_second for interval in intervals: window = get_window_of_sorted_freqs(sorted_freqs, interval, window_end) window_length = len(window) if window_length is not interval: continue if post_rate(window) > t: dt = window[int(window_length / 2)][0] res.append(dt) break return set(res)
def get_post_rates(tweet_freqs, t): sorted_freqs = sorted([(k,v) for k,v in tweet_freqs.iteritems()]) one_second = timedelta(seconds=1) intervals = [10,20,30,60] res = [] for k, v in sorted_freqs: dt = T.datetime_from_tweet_created_at(k) window_end = dt - one_second for interval in intervals: window = get_window_of_sorted_freqs(sorted_freqs, interval, window_end) window_length = len(window) if window_length is not interval: continue if post_rate(window) > t: dt = window[int(window_length / 2)][0] res.append(dt) break return set(res)
def get_window_of_sorted_freqs(freqs, seconds, window_end): freqs_with_datetime = [(tweet.datetime_from_tweet_created_at(k), v) for k,v in freqs] time_interval = timedelta(seconds=seconds) window_start = window_end - time_interval return [(dt, v) for dt, v in freqs_with_datetime if dt > window_start and dt <= window_end]
def offline_sliding_window(freqs, *intervals): freq_list = freqs.iteritems() sorted_freqs = sorted(freq_list, key=lambda (k,v):k) for k,v in sorted_freqs: dt = tweet.datetime_from_tweet_created_at(k) for interval in intervals: pr, dt = post_rate_and_datetime(freq_list, interval, dt) if pr and dt: print '%d %d %d' % (interval, pr, dt)
def get_window_of_sorted_freqs(freqs, seconds, window_end): freqs_with_datetime = [(tweet.datetime_from_tweet_created_at(k), v) for k, v in freqs] time_interval = timedelta(seconds=seconds) window_start = window_end - time_interval return [(dt, v) for dt, v in freqs_with_datetime if dt > window_start and dt <= window_end]
def offline_sliding_window(freqs, *intervals): freq_list = freqs.iteritems() sorted_freqs = sorted(freq_list, key=lambda (k, v): k) for k, v in sorted_freqs: dt = tweet.datetime_from_tweet_created_at(k) for interval in intervals: pr, dt = post_rate_and_datetime(freq_list, interval, dt) if pr and dt: print '%d %d %d' % (interval, pr, dt)
def get_post_rate(self): sorted_freqs = sorted([(tweet.datetime_from_tweet_created_at(k),v) for k,v in self.frequencies.iteritems()]) if sorted_freqs[-1][0] - sorted_freqs[0][0] < timedelta(seconds=10) or len(self.tweet_vects) < 10: return 0 mid_time = sorted_freqs[-1][0] - (sorted_freqs[-1][0] - sorted_freqs[0][0]) first_half = sum([freq for time, freq in sorted_freqs if time <= mid_time]) second_half = sum([freq for time, freq in sorted_freqs if time > mid_time]) return float(second_half) / first_half if first_half > 0 else 0
def get_post_rate(self): sorted_freqs = sorted([(tweet.datetime_from_tweet_created_at(k), v) for k, v in self.frequencies.iteritems()]) if sorted_freqs[-1][0] - sorted_freqs[0][0] < timedelta( seconds=10) or len(self.tweet_vects) < 10: return 0 mid_time = sorted_freqs[-1][0] - (sorted_freqs[-1][0] - sorted_freqs[0][0]) first_half = sum( [freq for time, freq in sorted_freqs if time <= mid_time]) second_half = sum( [freq for time, freq in sorted_freqs if time > mid_time]) return float(second_half) / first_half if first_half > 0 else 0
def get_age_seconds(self): sorted_times = sorted([tweet.datetime_from_tweet_created_at(s) for s in self.frequencies.keys()]) return (sorted_times[-1] - sorted_times[0]).total_seconds()