def extractHardFeatures(DIR): #TODO uncomment posts = pd.read_csv(DIR + 'posts/users_posts_times.csv') #activities = useractivity.usersActivityFast(posts) year, month, day = useractivity.theLatestPostTime(posts) #extract last week and last day posts posts_week, posts_day = useractivity.extractDayWeekActivity(posts, year, month, day) #print posts_week df_result = useractivity.extractTimeIntervalFeatures(posts_week, posts_day) print 'extracted Q_LAST_WEEK, A_LAST_WEEK, P_NUM_LAST_WEEK' df_result.to_csv(DIR + 'users/temp_features.csv', index=False) activities = useractivity.usersActivityFast(posts) df_tr = useractivity.userActivityTransform(activities) df_tr.to_csv(DIR + 'users/temporal_user_activities.csv', index=False) df = pd.read_csv(DIR + 'posts/quest_stats.csv') # extract features when a question was asked q_wknd = timefeatures.dateWeekend(df) q_wknd.to_csv(DIR + 'posts/quest_weekend.csv', index=False) extractLocs(DIR) df_tags = pd.read_csv(DIR + 'posts/quest_stats.csv') extractTagFeatures(DIR, df_tags)
def extractUserActivityFeatures(c, DIR): if not os.path.exists(DIR + 'users'): os.makedirs(DIR + 'users') users_activity = pd.DataFrame(query.usersActivity(c), columns=header['users_activity']) users_activity.to_csv(DIR + 'posts/users_activity.csv', index=False) posts = useractivity.makePosts(users_activity) posts = posts[posts['UserId'] != -1] return posts