Exemplo n.º 1
0
def extractHardFeatures(DIR):
    #TODO uncomment

   posts = pd.read_csv(DIR + 'posts/users_posts_times.csv')
   #activities = useractivity.usersActivityFast(posts)
   year, month, day = useractivity.theLatestPostTime(posts)
   #extract last week and last day posts
   posts_week, posts_day = useractivity.extractDayWeekActivity(posts, year, month, day)
   #print posts_week
   df_result = useractivity.extractTimeIntervalFeatures(posts_week, posts_day)
   print 'extracted Q_LAST_WEEK, A_LAST_WEEK, P_NUM_LAST_WEEK'
   df_result.to_csv(DIR + 'users/temp_features.csv', index=False)

   activities = useractivity.usersActivityFast(posts)
   df_tr = useractivity.userActivityTransform(activities)
   df_tr.to_csv(DIR + 'users/temporal_user_activities.csv', index=False)

   df = pd.read_csv(DIR + 'posts/quest_stats.csv')
   # extract features when a question was asked
   q_wknd = timefeatures.dateWeekend(df)
   q_wknd.to_csv(DIR + 'posts/quest_weekend.csv', index=False)

   extractLocs(DIR)

   df_tags = pd.read_csv(DIR + 'posts/quest_stats.csv')
   extractTagFeatures(DIR, df_tags)
Exemplo n.º 2
0
def dataCut(data, time_cutoff):
    data_sorted = data.sort(['TimeAsked'], ascending=False)
    data_entries = 0
    for (i, timestamp) in enumerate(data_sorted['TimeAsked']):
        time_answered = tmpf.parseTime(timestamp)
        if time_answered < time_cutoff:
            data_entries = i
            print data_entries, 'questions to cut'
            break
    data_cut = data_sorted[1:data_entries]
    return data_cut
Exemplo n.º 3
0
def dataCut(data, time_cutoff):
   data_sorted = data.sort(['TimeAsked'], ascending=False)
   data_entries = 0
   for (i, timestamp) in enumerate(data_sorted['TimeAsked']):
       time_answered = tmpf.parseTime(timestamp)
       if time_answered < time_cutoff:
           data_entries = i
           print data_entries, 'questions to cut'
           break
   data_cut = data_sorted[1:data_entries]
   return data_cut