Exemplo n.º 1
0
def crawlusermeta():
    db = DbInteract()
    unique_users = db.get_unique_users()
    print(unique_users)
    for row in unique_users:
        u = User(row)
        u.pull_metadata()

    print("User metadata refreshed")
Exemplo n.º 2
0
    def crawl(self):
        r = Reddit()
        db = DbInteract()
        user = r.conn.redditor(self.id)
        activity = []
        for comment in list(user.comments.new(limit=None)):
            activity.append(
                Activity(
                    comment.subreddit_id, comment.author.id, 'comment',
                    datetime.datetime.fromtimestamp(
                        comment.created_utc).isoformat()))

        for submission in list(user.submissions.new(limit=None)):
            activity.append(
                Activity(
                    submission.subreddit.id, submission.author.id,
                    'submission',
                    datetime.datetime.fromtimestamp(
                        submission.created_utc).isoformat()))

        db.add_activity_records(activity)
Exemplo n.º 3
0
  def read_lines(self, activity_file, act_type):
    db = DbInteract()
    activity = []

    for line_number, line in enumerate(activity_file):
      if line_number == 0:
        if isinstance(line, str):
          field_list = line.split(",")
          field_list = [field.strip("\n") for field in field_list]
        else:
          field_list = line.decode('utf-8').split(",")
          field_list = [field.strip("\n") for field in field_list]
        continue

      if isinstance(line, str):
        line = line.split(",")
      else:
        line = line.decode('utf-8').split(",")

      if len(field_list) != len(line):
        continue

      if act_type == 'comment':
        subreddit_id_index = self.index_by_field_name("subreddit", field_list)
        user_id_index = self.index_by_field_name("author", field_list)
        created_utc_index = self.index_by_field_name("created_utc", field_list)
        page_id_index = self.index_by_field_name("link_id", field_list)
        if line[user_id_index] != "[deleted]":
          a = (line[page_id_index][-6:], line[subreddit_id_index], line[user_id_index], 'comment', line[created_utc_index])
          activity.append(a)
      else:
        subreddit_index = self.index_by_field_name("subreddit", field_list)
        user_id_index = self.index_by_field_name("author", field_list)
        created_utc_index = self.index_by_field_name("created_utc", field_list)
        page_id_index = self.index_by_field_name("id", field_list)
        if line[user_id_index] != "[deleted]":
          a = (line[page_id_index], line[subreddit_index], line[user_id_index], 'link', line[created_utc_index])
          activity.append(a)
      
    db.add_activity_records(activity)
Exemplo n.º 4
0
    def pull_metadata(self):
        db = DbInteract()
        r = Reddit()
        try:
            print(self.id)
            if db.user_metadata_exists((self.id, )):
                return

            registrationTime = r.conn.redditor(self.id).created_utc
            comments = []
            oldest = None
            for comment in r.conn.redditor(self.id).comments.new(limit=None):
                comments.append(comment)
                if oldest == None or comment.created_utc < oldest:
                    oldest = comment.created_utc

            submitted = []
            for submission in r.conn.redditor(
                    self.id).submissions.new(limit=None):
                submitted.append(submission)
                if oldest == None or submission.created_utc < oldest:
                    oldest = submission.created_utc

            comment_count = len(comments)
            submission_count = len(submitted)

            max_activity = False

            if comment_count == 1000 or submission_count == 1000:
                max_activity = True

            db.save_user_metadata((self.id, max_activity, registrationTime,
                                   oldest, oldest - registrationTime))
            print("Saved")
        except Exception as e:
            print(e)
Exemplo n.º 5
0
def delaypercentile(user_id):
    db = DbInteract()
    delaypercentile = db.calculate_delay_percentile(user_id)
    print("User is in the", delaypercentile, "percentile of user delays.")
Exemplo n.º 6
0
def activitypercentile(user_id):
    db = DbInteract()
    act_percentile = db.calculate_activity_percentile(user_id)
    print("User is in the", act_percentile, "percentile of user activity.")
Exemplo n.º 7
0
 def refresh():
     db = DbInteract()
     db.refresh_relationship_graph()
Exemplo n.º 8
0
 def refresh():
     db = DbInteract()
     db.refresh_action_graph()