def extractHardFeatures(DIR): #TODO uncomment posts = pd.read_csv(DIR + 'posts/users_posts_times.csv') #activities = useractivity.usersActivityFast(posts) year, month, day = useractivity.theLatestPostTime(posts) #extract last week and last day posts posts_week, posts_day = useractivity.extractDayWeekActivity(posts, year, month, day) #print posts_week df_result = useractivity.extractTimeIntervalFeatures(posts_week, posts_day) print 'extracted Q_LAST_WEEK, A_LAST_WEEK, P_NUM_LAST_WEEK' df_result.to_csv(DIR + 'users/temp_features.csv', index=False) activities = useractivity.usersActivityFast(posts) df_tr = useractivity.userActivityTransform(activities) df_tr.to_csv(DIR + 'users/temporal_user_activities.csv', index=False) df = pd.read_csv(DIR + 'posts/quest_stats.csv') # extract features when a question was asked q_wknd = timefeatures.dateWeekend(df) q_wknd.to_csv(DIR + 'posts/quest_weekend.csv', index=False) extractLocs(DIR) df_tags = pd.read_csv(DIR + 'posts/quest_stats.csv') extractTagFeatures(DIR, df_tags)
def dataCut(data, time_cutoff): data_sorted = data.sort(['TimeAsked'], ascending=False) data_entries = 0 for (i, timestamp) in enumerate(data_sorted['TimeAsked']): time_answered = tmpf.parseTime(timestamp) if time_answered < time_cutoff: data_entries = i print data_entries, 'questions to cut' break data_cut = data_sorted[1:data_entries] return data_cut
def dataCut(data, time_cutoff): data_sorted = data.sort(['TimeAsked'], ascending=False) data_entries = 0 for (i, timestamp) in enumerate(data_sorted['TimeAsked']): time_answered = tmpf.parseTime(timestamp) if time_answered < time_cutoff: data_entries = i print data_entries, 'questions to cut' break data_cut = data_sorted[1:data_entries] return data_cut