def create_graph_impl(request, event_id): e = Event.objects.get(id=event_id) sdate = e.start_date edate = e.end_date tweets = Tweet.objects.filter(keyword__event = event_id) if sdate == None: sdate=tweets.order_by('created_at')[0].created_at if edate == None: edate=tweets.order_by('-created_at')[0].created_at tdelta=(edate-sdate) total_sec=tdelta.seconds + tdelta.days * 24 *3600 total_min=total_sec / 60.0 total_hours=total_min / 60.0 if total_min <= 1440: td=timedelta(minutes=1) sec_divisor = 60 stf = {"date": ('%Y,%m,%d,%H,%M'),"d": 'new Date(%Y,%m-1,%d,%H,%M)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24,MI')"} else: select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)"} elif total_hours <= 2016: # 24 hours x 28 days x 3 = about 3 months td=timedelta(hours=1) sec_divisor = 3600 stf = {"date": ('%Y,%m,%d,%H'),"d": 'new Date(%Y,%m-1,%d,%H)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24')"} else: select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H') , created_at)"} else: td=timedelta(days=1) sec_divisor = 86400 stf = {"date": ('%Y,%m,%d'),"d": 'new Date(%Y,%m-1,%d)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')" , "date":"to_char(created_at, 'YYYY,MM,DD')"} else: select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)" , "date":"strftime(('%%Y,%%m,%%d') , created_at)"} tweets = tweets.filter(created_at__gte = sdate).filter(created_at__lte = edate).extra(select = select_data).values('d','date').annotate(num_tweets = Count('tweet')).order_by('date') tweets=list(tweets) i = 1 detector = MeanOutliers.factory() list_peaks = [] # loop through the tweets and detect a peak based on mean deviation function provided. save the start date # and the date of the peak in a dictionary. save each peak in list_peaks. while i < len(tweets): tweets[0]['title'] = 'null' tweets[0]['data'] = 'null' # sd_p=tweets[i-1]['date'].split(',') # sd_p=map(int , sd_p) # sdt_p=datetime(*sd_p) sdt_p=convert_date(tweets[i-1]['date']) sd_n=tweets[i]['date'].split(',') sd_n=map(int , sd_n) sdt_n=datetime(*sd_n) delta_d=(sdt_n-sdt_p) delta_d = (delta_d.seconds + delta_d.days * 24 *3600)/sec_divisor count=0 if delta_d != 1: j=0 while(j<delta_d-1): insert_tweet={'title':'null','num_tweets':0,'data':'null','children':'null'} sdt_p = sdt_p+td insert_tweet['date']=sdt_p.strftime(stf['date']) insert_tweet['d']=sdt_p.strftime(stf['d']) tweets.insert(i+j,insert_tweet) j+=1 current_val = tweets[i]['num_tweets'] previous_val = tweets[i-1]['num_tweets'] mdiv = detector(None,tweets[i]['num_tweets'], 1) if mdiv > 2.0 and current_val > previous_val and current_val > 10: start_freq = previous_val start_date = tweets[i-1]['date'] # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep # running the mdiv function on each value because it is requires previous values to calculate the mean. while(current_val > previous_val): tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' if i+1<len(tweets): i+=1 mdiv = detector(None,tweets[i]['num_tweets'], 1) current_val = tweets[i]['num_tweets'] previous_val = tweets[i-1]['num_tweets'] peak_date = tweets[i-1]['date'] else: peak_date = tweets[i]['date'] i+=1 break d = {"start_date":start_date,"start_freq":start_freq ,"peak_date":peak_date} list_peaks.append(d) else: tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' i+=1 keywords = Keyword.objects.filter(event__id = event_id) words = [kw.key_word for kw in keywords] peaks = find_end_dates(tweets,list_peaks) tweets = annotate_peaks(peaks,tweets,event_id,words) try: children = e.children.order_by('start_date') tweets = peak_child_detection(children,list_peaks,tweets) except: tweets = tweets t = loader.get_template('twitinfo/create_graph.html') resp_string = t.render(Context({ 'tweets': tweets })) return resp_string
print MeanOutliers.nummeandevs(None, 90, 2, 3) print MeanOutliers.nummeandevs(None, 20, 2, 3) print MeanOutliers.nummeandevs(None, 21, 2, 3) print MeanOutliers.nummeandevs(None, 23, 2, 3) print MeanOutliers.nummeandevs(None, 25, 2, 3) print MeanOutliers.nummeandevs(None, 15, 2, 3) print MeanOutliers.nummeandevs(None, 20, 2, 3) print MeanOutliers.nummeandevs(None, 25, 2, 3) print MeanOutliers.nummeandevs(None, 25, 2, 3) print MeanOutliers.nummeandevs(None, 25, 2, 3) print MeanOutliers.nummeandevs(None, 60, 2, 3) """ meansum = 0.0 meandevsum = 0.0 count = 0 std = 40 incount = 0 for i in range(1,10000): val = random.gauss(0,std) if abs(val) < 2*std: incount += 1 meansum += 1.0*val count += 1 meandevsum += abs(1.0*(val - meansum/count)) MeanOutliers.nummeandevs(None, val, 2, 3) print meansum/count, meandevsum/count print "incount", incount print MeanOutliers.nummeandevs(None, 80, 2, 3) pass
def create_graph_impl(request, event_id): e = Event.objects.get(id=event_id) sdate = e.start_date edate = e.end_date tweets = Tweet.objects.filter(keyword__event=event_id) if sdate == None: sdate = tweets.order_by('created_at')[0].created_at if edate == None: edate = tweets.order_by('-created_at')[0].created_at tdelta = (edate - sdate) total_sec = tdelta.seconds + tdelta.days * 24 * 3600 total_min = total_sec / 60.0 total_hours = total_min / 60.0 if total_min <= 1440: td = timedelta(minutes=1) sec_divisor = 60 stf = {"date": ('%Y,%m,%d,%H,%M'), "d": 'new Date(%Y,%m-1,%d,%H,%M)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = { "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')", "date": "to_char(created_at, 'YYYY,MM,DD,HH24,MI')" } else: select_data = { "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)", "date": "strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)" } elif total_hours <= 2016: # 24 hours x 28 days x 3 = about 3 months td = timedelta(hours=1) sec_divisor = 3600 stf = {"date": ('%Y,%m,%d,%H'), "d": 'new Date(%Y,%m-1,%d,%H)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = { "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')", "date": "to_char(created_at, 'YYYY,MM,DD,HH24')" } else: select_data = { "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)", "date": "strftime(('%%Y,%%m,%%d,%%H') , created_at)" } else: td = timedelta(days=1) sec_divisor = 86400 stf = {"date": ('%Y,%m,%d'), "d": 'new Date(%Y,%m-1,%d)'} if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2': select_data = { "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')", "date": "to_char(created_at, 'YYYY,MM,DD')" } else: select_data = { "d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)", "date": "strftime(('%%Y,%%m,%%d') , created_at)" } tweets = tweets.filter(created_at__gte=sdate).filter( created_at__lte=edate).extra(select=select_data).values( 'd', 'date').annotate(num_tweets=Count('tweet')).order_by('date') tweets = list(tweets) i = 1 detector = MeanOutliers.factory() list_peaks = [] # loop through the tweets and detect a peak based on mean deviation function provided. save the start date # and the date of the peak in a dictionary. save each peak in list_peaks. while i < len(tweets): tweets[0]['title'] = 'null' tweets[0]['data'] = 'null' # sd_p=tweets[i-1]['date'].split(',') # sd_p=map(int , sd_p) # sdt_p=datetime(*sd_p) sdt_p = convert_date(tweets[i - 1]['date']) sd_n = tweets[i]['date'].split(',') sd_n = map(int, sd_n) sdt_n = datetime(*sd_n) delta_d = (sdt_n - sdt_p) delta_d = (delta_d.seconds + delta_d.days * 24 * 3600) / sec_divisor count = 0 if delta_d != 1: j = 0 while (j < delta_d - 1): insert_tweet = { 'title': 'null', 'num_tweets': 0, 'data': 'null', 'children': 'null' } sdt_p = sdt_p + td insert_tweet['date'] = sdt_p.strftime(stf['date']) insert_tweet['d'] = sdt_p.strftime(stf['d']) tweets.insert(i + j, insert_tweet) j += 1 current_val = tweets[i]['num_tweets'] previous_val = tweets[i - 1]['num_tweets'] mdiv = detector(None, tweets[i]['num_tweets'], 1) if mdiv > 2.0 and current_val > previous_val and current_val > 10: start_freq = previous_val start_date = tweets[i - 1]['date'] # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep # running the mdiv function on each value because it is requires previous values to calculate the mean. while (current_val > previous_val): tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' if i + 1 < len(tweets): i += 1 mdiv = detector(None, tweets[i]['num_tweets'], 1) current_val = tweets[i]['num_tweets'] previous_val = tweets[i - 1]['num_tweets'] peak_date = tweets[i - 1]['date'] else: peak_date = tweets[i]['date'] i += 1 break d = { "start_date": start_date, "start_freq": start_freq, "peak_date": peak_date } list_peaks.append(d) else: tweets[i]['title'] = 'null' tweets[i]['data'] = 'null' i += 1 keywords = Keyword.objects.filter(event__id=event_id) words = [kw.key_word for kw in keywords] peaks = find_end_dates(tweets, list_peaks) tweets = annotate_peaks(peaks, tweets, event_id, words) try: children = e.children.order_by('start_date') tweets = peak_child_detection(children, list_peaks, tweets) except: tweets = tweets t = loader.get_template('twitinfo/create_graph.html') resp_string = t.render(Context({'tweets': tweets})) return resp_string