Пример #1
0
def create_graph_impl(request, event_id):
    e = Event.objects.get(id=event_id)
    sdate = e.start_date
    edate = e.end_date
    tweets = Tweet.objects.filter(keyword__event = event_id)
    
    if sdate == None:
        sdate=tweets.order_by('created_at')[0].created_at
    if edate == None:
        edate=tweets.order_by('-created_at')[0].created_at
        
    tdelta=(edate-sdate)
    total_sec=tdelta.seconds + tdelta.days * 24 *3600
    total_min=total_sec / 60.0
    total_hours=total_min / 60.0
    
    if total_min <= 1440:
        td=timedelta(minutes=1)
        sec_divisor = 60
        stf = {"date": ('%Y,%m,%d,%H,%M'),"d": 'new Date(%Y,%m-1,%d,%H,%M)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24,MI')"}
        else:
            select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)"}
      
    elif total_hours <= 2016: # 24 hours x 28 days x 3 = about 3 months
        td=timedelta(hours=1)
        sec_divisor = 3600
        stf = {"date": ('%Y,%m,%d,%H'),"d": 'new Date(%Y,%m-1,%d,%H)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')" , "date":"to_char(created_at, 'YYYY,MM,DD,HH24')"}
        else:
            select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)" , "date":"strftime(('%%Y,%%m,%%d,%%H') , created_at)"}
    else:
        td=timedelta(days=1)
        sec_divisor = 86400
        stf = {"date": ('%Y,%m,%d'),"d": 'new Date(%Y,%m-1,%d)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {"d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')" , "date":"to_char(created_at, 'YYYY,MM,DD')"}
        else:
            select_data = {"d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)" , "date":"strftime(('%%Y,%%m,%%d') , created_at)"}

    
    tweets = tweets.filter(created_at__gte = sdate).filter(created_at__lte = edate).extra(select = select_data).values('d','date').annotate(num_tweets = Count('tweet')).order_by('date')
    tweets=list(tweets)
    
    i = 1
    detector = MeanOutliers.factory()
    list_peaks = []
    # loop through the tweets and detect a peak based on mean deviation function provided. save the start date
    # and the date of the peak in a dictionary.  save each peak in list_peaks.
    while i < len(tweets):
        tweets[0]['title'] = 'null'
        tweets[0]['data'] = 'null'
        # sd_p=tweets[i-1]['date'].split(',')
        # sd_p=map(int , sd_p)
        # sdt_p=datetime(*sd_p)
        sdt_p=convert_date(tweets[i-1]['date'])
        sd_n=tweets[i]['date'].split(',')
        sd_n=map(int , sd_n)
        sdt_n=datetime(*sd_n)
        delta_d=(sdt_n-sdt_p)
        delta_d = (delta_d.seconds + delta_d.days * 24 *3600)/sec_divisor  
    
        count=0
        if delta_d != 1:
            j=0
            while(j<delta_d-1):
                insert_tweet={'title':'null','num_tweets':0,'data':'null','children':'null'}
                sdt_p = sdt_p+td    
                insert_tweet['date']=sdt_p.strftime(stf['date'])
                insert_tweet['d']=sdt_p.strftime(stf['d'])
                tweets.insert(i+j,insert_tweet)
                j+=1
       
        current_val = tweets[i]['num_tweets']
        previous_val = tweets[i-1]['num_tweets']
        mdiv = detector(None,tweets[i]['num_tweets'], 1)
        if mdiv > 2.0 and current_val > previous_val and current_val > 10:
            start_freq = previous_val 
            start_date = tweets[i-1]['date']
            # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep
            # running the mdiv function on each value because it is requires previous values to calculate the mean.
            while(current_val > previous_val):
                  tweets[i]['title'] = 'null'
                  tweets[i]['data'] = 'null'
                  if i+1<len(tweets):
                      i+=1
                      mdiv = detector(None,tweets[i]['num_tweets'], 1)
                      current_val = tweets[i]['num_tweets']
                      previous_val = tweets[i-1]['num_tweets'] 
                      peak_date = tweets[i-1]['date']  
                  else:
                      peak_date = tweets[i]['date']
                      i+=1
                      break
            d = {"start_date":start_date,"start_freq":start_freq ,"peak_date":peak_date}
            list_peaks.append(d)
        else:
            tweets[i]['title'] = 'null'
            tweets[i]['data'] = 'null'
            i+=1

    keywords = Keyword.objects.filter(event__id = event_id)
    words = [kw.key_word for kw in keywords]
    peaks = find_end_dates(tweets,list_peaks)
    
    tweets = annotate_peaks(peaks,tweets,event_id,words)
    try:
        children = e.children.order_by('start_date')
        tweets = peak_child_detection(children,list_peaks,tweets)
    except:
        tweets = tweets
    t = loader.get_template('twitinfo/create_graph.html')
    resp_string = t.render(Context({ 'tweets': tweets }))
    return resp_string
Пример #2
0
Файл: test.py Проект: badar/ssql
    
    print MeanOutliers.nummeandevs(None, 90, 2, 3)
    print MeanOutliers.nummeandevs(None, 20, 2, 3)
    print MeanOutliers.nummeandevs(None, 21, 2, 3)
    print MeanOutliers.nummeandevs(None, 23, 2, 3)
    print MeanOutliers.nummeandevs(None, 25, 2, 3)
    print MeanOutliers.nummeandevs(None, 15, 2, 3)
    print MeanOutliers.nummeandevs(None, 20, 2, 3)
    print MeanOutliers.nummeandevs(None, 25, 2, 3)
    print MeanOutliers.nummeandevs(None, 25, 2, 3)
    print MeanOutliers.nummeandevs(None, 25, 2, 3)
    print MeanOutliers.nummeandevs(None, 60, 2, 3)
    """
    meansum = 0.0
    meandevsum = 0.0
    count = 0
    std = 40
    incount = 0
    for i in range(1,10000):
        val = random.gauss(0,std)
        if abs(val) < 2*std:
            incount += 1
        meansum += 1.0*val
        count += 1
        meandevsum += abs(1.0*(val - meansum/count))
        MeanOutliers.nummeandevs(None, val, 2, 3)
    print meansum/count, meandevsum/count
    print "incount", incount
    print MeanOutliers.nummeandevs(None, 80, 2, 3)
    pass
Пример #3
0
def create_graph_impl(request, event_id):
    e = Event.objects.get(id=event_id)
    sdate = e.start_date
    edate = e.end_date
    tweets = Tweet.objects.filter(keyword__event=event_id)

    if sdate == None:
        sdate = tweets.order_by('created_at')[0].created_at
    if edate == None:
        edate = tweets.order_by('-created_at')[0].created_at

    tdelta = (edate - sdate)
    total_sec = tdelta.seconds + tdelta.days * 24 * 3600
    total_min = total_sec / 60.0
    total_hours = total_min / 60.0

    if total_min <= 1440:
        td = timedelta(minutes=1)
        sec_divisor = 60
        stf = {"date": ('%Y,%m,%d,%H,%M'), "d": 'new Date(%Y,%m-1,%d,%H,%M)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {
                "d":
                "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD, HH24,MI)')",
                "date": "to_char(created_at, 'YYYY,MM,DD,HH24,MI')"
            }
        else:
            select_data = {
                "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H,%%M)', created_at)",
                "date": "strftime(('%%Y,%%m,%%d,%%H,%%M') , created_at)"
            }

    elif total_hours <= 2016:  # 24 hours x 28 days x 3 = about 3 months
        td = timedelta(hours=1)
        sec_divisor = 3600
        stf = {"date": ('%Y,%m,%d,%H'), "d": 'new Date(%Y,%m-1,%d,%H)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {
                "d":
                "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD,HH24)')",
                "date": "to_char(created_at, 'YYYY,MM,DD,HH24')"
            }
        else:
            select_data = {
                "d": "strftime('new Date(%%Y,%%m-1,%%d,%%H)', created_at)",
                "date": "strftime(('%%Y,%%m,%%d,%%H') , created_at)"
            }
    else:
        td = timedelta(days=1)
        sec_divisor = 86400
        stf = {"date": ('%Y,%m,%d'), "d": 'new Date(%Y,%m-1,%d)'}
        if settings.DATABASES['default']['ENGINE'] == 'postgresql_psycopg2':
            select_data = {
                "d": "to_char(created_at, 'ne\"w\" \"D\"ate(YYYY,MM-1,DD)')",
                "date": "to_char(created_at, 'YYYY,MM,DD')"
            }
        else:
            select_data = {
                "d": "strftime('new Date(%%Y,%%m-1,%%d)', created_at)",
                "date": "strftime(('%%Y,%%m,%%d') , created_at)"
            }

    tweets = tweets.filter(created_at__gte=sdate).filter(
        created_at__lte=edate).extra(select=select_data).values(
            'd', 'date').annotate(num_tweets=Count('tweet')).order_by('date')
    tweets = list(tweets)

    i = 1
    detector = MeanOutliers.factory()
    list_peaks = []
    # loop through the tweets and detect a peak based on mean deviation function provided. save the start date
    # and the date of the peak in a dictionary.  save each peak in list_peaks.
    while i < len(tweets):
        tweets[0]['title'] = 'null'
        tweets[0]['data'] = 'null'
        # sd_p=tweets[i-1]['date'].split(',')
        # sd_p=map(int , sd_p)
        # sdt_p=datetime(*sd_p)
        sdt_p = convert_date(tweets[i - 1]['date'])
        sd_n = tweets[i]['date'].split(',')
        sd_n = map(int, sd_n)
        sdt_n = datetime(*sd_n)
        delta_d = (sdt_n - sdt_p)
        delta_d = (delta_d.seconds + delta_d.days * 24 * 3600) / sec_divisor

        count = 0
        if delta_d != 1:
            j = 0
            while (j < delta_d - 1):
                insert_tweet = {
                    'title': 'null',
                    'num_tweets': 0,
                    'data': 'null',
                    'children': 'null'
                }
                sdt_p = sdt_p + td
                insert_tweet['date'] = sdt_p.strftime(stf['date'])
                insert_tweet['d'] = sdt_p.strftime(stf['d'])
                tweets.insert(i + j, insert_tweet)
                j += 1

        current_val = tweets[i]['num_tweets']
        previous_val = tweets[i - 1]['num_tweets']
        mdiv = detector(None, tweets[i]['num_tweets'], 1)
        if mdiv > 2.0 and current_val > previous_val and current_val > 10:
            start_freq = previous_val
            start_date = tweets[i - 1]['date']
            # once a peak is detected, keep climbing up the peak until the maximum is reached. store the peak date and keep
            # running the mdiv function on each value because it is requires previous values to calculate the mean.
            while (current_val > previous_val):
                tweets[i]['title'] = 'null'
                tweets[i]['data'] = 'null'
                if i + 1 < len(tweets):
                    i += 1
                    mdiv = detector(None, tweets[i]['num_tweets'], 1)
                    current_val = tweets[i]['num_tweets']
                    previous_val = tweets[i - 1]['num_tweets']
                    peak_date = tweets[i - 1]['date']
                else:
                    peak_date = tweets[i]['date']
                    i += 1
                    break
            d = {
                "start_date": start_date,
                "start_freq": start_freq,
                "peak_date": peak_date
            }
            list_peaks.append(d)
        else:
            tweets[i]['title'] = 'null'
            tweets[i]['data'] = 'null'
            i += 1

    keywords = Keyword.objects.filter(event__id=event_id)
    words = [kw.key_word for kw in keywords]
    peaks = find_end_dates(tweets, list_peaks)

    tweets = annotate_peaks(peaks, tweets, event_id, words)
    try:
        children = e.children.order_by('start_date')
        tweets = peak_child_detection(children, list_peaks, tweets)
    except:
        tweets = tweets
    t = loader.get_template('twitinfo/create_graph.html')
    resp_string = t.render(Context({'tweets': tweets}))
    return resp_string