示例#1
0
def extractHardFeatures(DIR):
    #TODO uncomment

   posts = pd.read_csv(DIR + 'posts/users_posts_times.csv')
   #activities = useractivity.usersActivityFast(posts)
   year, month, day = useractivity.theLatestPostTime(posts)
   #extract last week and last day posts
   posts_week, posts_day = useractivity.extractDayWeekActivity(posts, year, month, day)
   #print posts_week
   df_result = useractivity.extractTimeIntervalFeatures(posts_week, posts_day)
   print 'extracted Q_LAST_WEEK, A_LAST_WEEK, P_NUM_LAST_WEEK'
   df_result.to_csv(DIR + 'users/temp_features.csv', index=False)

   activities = useractivity.usersActivityFast(posts)
   df_tr = useractivity.userActivityTransform(activities)
   df_tr.to_csv(DIR + 'users/temporal_user_activities.csv', index=False)

   df = pd.read_csv(DIR + 'posts/quest_stats.csv')
   # extract features when a question was asked
   q_wknd = timefeatures.dateWeekend(df)
   q_wknd.to_csv(DIR + 'posts/quest_weekend.csv', index=False)

   extractLocs(DIR)

   df_tags = pd.read_csv(DIR + 'posts/quest_stats.csv')
   extractTagFeatures(DIR, df_tags)
示例#2
0
def extractUserActivityFeatures(c, DIR):
   if not os.path.exists(DIR + 'users'):
        os.makedirs(DIR + 'users')
   users_activity = pd.DataFrame(query.usersActivity(c), columns=header['users_activity'])
   users_activity.to_csv(DIR + 'posts/users_activity.csv', index=False)

   posts = useractivity.makePosts(users_activity)
   posts = posts[posts['UserId'] != -1]
   return posts