def main():
    """
  query = 'SELECT * FROM submissions WHERE manually_marked = 0 and ' \
          'manually_verified = 0 and gender IS NOT NULL and age IS NOT NULL and ' \
          'height_in IS NOT NULL and current_weight_lbs IS NOT NULL;'
  """
    submission_id = raw_input("Enter submission id to clear analysis: ")
    query = 'SELECT * FROM submissions WHERE id = "%s";' % submission_id
    # query = 'SELECT * FROM submissions;'
    m = DatabaseManager(DATABASE_PATH)
    submissions = [Submission(x) for x in m.query(query)]
    #count = 0
    #max_count = 50
    assert (len(submissions) == 1)
    for submission in submissions:
        #submission.media_json = None
        #submission.media_embed_json = None

        print "Title: ", submission.title
        print "Selftext: ", submission.self_text
        print "URL: ", submission.url
        print "Media JSON: ", submission.media_json
        # print json_str
        print "--------------------------------------------------------------------------------"

        submission.previous_weight_lbs = None
        submission.current_weight_lbs = None
        submission.age = None
        submission.height_in = None
        m.replace_submission(submission)
def main():
  """
  query = 'SELECT * FROM submissions WHERE manually_marked = 0 and ' \
          'manually_verified = 0 and gender IS NOT NULL and age IS NOT NULL and ' \
          'height_in IS NOT NULL and current_weight_lbs IS NOT NULL;'
  """
  query = 'SELECT * FROM submissions WHERE media_json IS NULL;'
  # query = 'SELECT * FROM submissions;'
  m = DatabaseManager(DATABASE_PATH)
  submissions = [Submission(x) for x in m.query(query)]
  #count = 0
  #max_count = 50
  for submission in submissions:
    #submission.media_json = None
    #submission.media_embed_json = None
    print "ID: ", submission.id
    print "Title: ", submission.title
    print "Selftext: ", submission.self_text
    print "URL: ", submission.url

    # print json_str
    try:
      Imgur.load_imgur_information_for_submission(submission)
    except:
      continue
      print "------------------------------------------------------"
    print "Media JSON: ", submission.media_json

    print "--------------------------------------------------------------------------------"

    m.replace_submission(submission)
def main():
    query = 'SELECT * FROM submissions WHERE ' \
            'gender IS NOT NULL and ' \
            'height_in IS NOT NULL and previous_weight_lbs IS NOT NULL and current_weight_lbs IS NOT NULL and media_json IS NOT NULL;'
    m = DatabaseManager(DATABASE_PATH)
    submissions = [Submission(x) for x in m.query(query)]
    json_dump_str = Submission.submission_list_to_json(submissions)
    f = open('json_dump.json', 'w')
    f.write(json_dump_str)
    print "Wrote ", len(submissions), "to  disk."
 def __update_given_submissions(self, submissions):
     for submission in submissions:
         if not self.image_manager.row_exists(submission.id):
             s = Submission.from_reddit_api(submission)
             self.image_manager.insert_submission(s)
         else:
             # if submission does exist, update its score
             new_submission = Submission.from_reddit_api(submission)
             # existing_submission =
             query = 'SELECT * FROM submissions WHERE id = "%s";' % submission.id
             existing_submissions = [Submission(x) for x in m.query(query)]
             assert (len(existing_submissions) == 1)
             existing_submission = existing_submissions[0]
             if existing_submission.score != new_submission.score:
                 #print "NEW_SUBMISSION: ", new_submission.to_tuple()
                 #print "BEFORE: ", existing_submission.to_tuple()
                 existing_submission.score = new_submission.score
                 #print "AFTER: ", existing_submission.to_tuple()
                 #time.sleep(5)
                 m.replace_submission(existing_submission)
Exemplo n.º 5
0
def main():
  query = 'SELECT * FROM submissions WHERE manually_marked = 0 and ' \
          'manually_verified = 0 and gender IS NOT NULL and age IS NOT NULL and ' \
          'height_in IS NOT NULL and previous_weight_lbs IS NOT NULL and ' \
          'current_weight_lbs IS NOT NULL;'
  m = DatabaseManager(DATABASE_PATH)
  submissions = [Submission(x) for x in m.query(query)]
  for submission in submissions:
    print submission.id
    print "TITLE:", submission.title
    print "CLASSIFICATION: "
    print "gender: ", submission.gender, "age: ", submission.age, "height_in: ", submission.height_in, "previous_weight: ", submission.previous_weight_lbs, "current_weight: ", submission.current_weight_lbs
    """
    submission.gender = r.gender_is_female
      submission.age = r.age
      submission.height_in = r.height_in

      submission.previous_weight_lbs = r.previous_weight
      submission.current_weight_lbs = r.current_weight
      """
    print "----------------------------------------------------------------------------------------"
Exemplo n.º 6
0
def run_test():
    # We primarily want to classify pictures which have associated media, but do not have a classification
    # We really don't give a shit about any of the other submissions.
    submission_id = "1hncxw"
    query = 'SELECT * FROM submissions WHERE id="%s"' % submission_id
    m = DatabaseManager(DATABASE_PATH)
    # TODO: get all unique user names

    # for each user:
    # look up their submitted posts in the following subreddits

    # return [Submission(x) for x in c.fetchall()]
    all_matches = []
    submissions = [Submission(x) for x in m.query(query)]
    assert (len(submissions) == 1)
    submission = submissions[0]

    # M/28/5'7" Day 1, goal is to look as great as I feel!
    # "[MF]/\d+/\d+'\d+"

    r = RedditAnalyzer(submission.title, submission.self_text)

    # Start Print statements
    print "Title: ", submission.title
    print "Self text: ", submission.self_text
    # submission.manually_verified

    #exit()
    print
    # Later, we can work on the selftext
    #text = nltk.word_tokenize("And now for something completely different")
    #text2 = nltk.word_tokenize(submission.title)
    #print nltk.pos_tag(text2)

    print t.bold(t.red("CLASSIFICATION: " + r.get_debug_str()))
    print t.bold(
        t.red("LOW CONFIDENCE CLASSIFICATION: " + r.get_lc_debug_str()))
    print t.bold(
        t.green("Potential weights:" +
                ','.join(str(x) for x in r.potential_weights)))
Exemplo n.º 7
0
def main():
    """
  query = 'SELECT * FROM submissions WHERE manually_marked = 0 and ' \
          'manually_verified = 0 and gender IS NOT NULL and age IS NOT NULL and ' \
          'height_in IS NOT NULL and current_weight_lbs IS NOT NULL;'
  """
    query = 'SELECT * FROM submissions WHERE manually_marked = 0 and manually_verified = 0 and media_json NOT NULL;'
    # query = 'SELECT * FROM submissions;'
    m = DatabaseManager(DATABASE_PATH)
    submissions = [Submission(x) for x in m.query(query)]
    #count = 0
    #max_count = 50
    for submission in submissions:
        # submission.media_json = None
        # submission.media_embed_json = None
        #Imgur.load_imgur_information_for_submission(submission)
        submission.gender = None
        submission.age = None
        submission.current_weight_lbs = None
        submission.previous_weight_lbs = None
        submission.height_in = None
        m.replace_submission(submission)
Exemplo n.º 8
0
def main():
  time_taken = 0
  query = 'SELECT * FROM submissions WHERE manually_marked = 0 and manually_verified = 0 and media_json NOT NULL ORDER BY score DESC;'
  m = DatabaseManager(DATABASE_PATH)

  # TODO: get all unique user names

  # for each user:
  # look up their submitted posts in the following subreddits

  # return [Submission(x) for x in c.fetchall()]
  all_matches = []
  submissions = [Submission(x) for x in m.query(query)]
  classifications = 0
  total = 0
  weight_and_height = 0
  atleast_height = 0
  previous_stats = ""
  global_start_time = time.time()
  entries_processed = 0

  for submission in submissions:
    #submission.media_json = None
    #submission.media_embed_json = None
    r = RedditAnalyzer(submission.title, submission.self_text)





    if r.has_gender() and r.has_height() and not r.has_current_weight():
      local_start_time = time.time()
      entries_processed += 1

      # TODO submission.manually_marked = 1


      print "ID: ", submission.id
      print "Title: ", t.bold(submission.title)
      print "Self text: ", submission.self_text
      print "URL: ", submission.url
      print "Score: ", submission.score

      print t.bold(t.red("CLASSIFICATION: " + r.get_debug_str()))
      print t.bold(t.red("LOW CONFIDENCE CLASSIFICATION: " + r.get_lc_debug_str()))
      # print t.bold(t.green("Potential weights:" + ','.join(str(x) for x in r.potential_weights)))


      print t.bold(t.green(previous_stats))

      print "NOTE: If current weight is skipped, nothing will be saved."

      previous_weight = raw_input('Enter previous weight: ')

      if previous_weight == "XXX":
        # This means this is a bad entry and does not have the
        # adequate weight data
        submission.manually_marked = 1
        submission.manually_verified = 1
        m.replace_submission(submission)
        continue

      current_weight = raw_input('Enter current weight: ')

      if previous_weight:
        print "Entered previous weight of: ", previous_weight

      if current_weight:
        print "Entered current weight of: ", current_weight


      if current_weight:
        submission.current_weight_lbs = int(current_weight)
        if previous_weight:
          submission.previous_weight_lbs = int(previous_weight)

        # We know that one of the low confidence or regual is set,
        # so we know the below two if statements will be successful
        # in setting the values
        if r.gender_is_female is None:
          r.gender_is_female = r.lc_gender_is_female

        if not r.height_in:
          r.height_in = r.lc_height_in



        submission.gender = r.gender_is_female
        submission.height_in = r.height_in
        if r.age:
          submission.age = r.age
        # submission.manually_verified
        submission.manually_marked = 1
        submission.manually_verified = 1



        assert(verify_submission_meets_criteria(submission))

        m.replace_submission(submission)
        # entries_processed
        local_end_time = time.time()
        previous_stats = "\n"
        previous_stats += "Entry took " + str(round((local_end_time - local_start_time), 2)) + " seconds.\n"
        previous_stats += "Rate: " + str(round((3600 / (local_end_time - local_start_time)), 2)) + " entries / hr.\n"
        entries_per_second_so_far = entries_processed / (local_end_time - global_start_time)
        previous_stats += "Ongoing Rate: " + str(round(entries_per_second_so_far * 3600, 2)) + " entries / hr."
        print previous_stats


      print
      print "---------------------------------------------------------------------"



  print title
def analyze_all_progress_pics():
    # We primarily want to classify pictures which have associated media, but do not have a classification
    # We really don't give a shit about any of the other submissions.
    query = 'SELECT * FROM submissions WHERE manually_marked = 0 and manually_verified = 0 and media_json NOT NULL;'
    m = DatabaseManager(DATABASE_PATH)

    # TODO: get all unique user names

    # for each user:
    # look up their submitted posts in the following subreddits

    # return [Submission(x) for x in c.fetchall()]
    all_matches = []
    submissions = [Submission(x) for x in m.query(query)]
    classifications = 0
    total = 0
    weight_and_height = 0
    atleast_height = 0
    for submission in submissions:
        total += 1
        # M/28/5'7" Day 1, goal is to look as great as I feel!
        # "[MF]/\d+/\d+'\d+"

        r = RedditAnalyzer(submission.title, submission.self_text)

        # print "BEFORE: ", submission.to_tuple()

        # the if statement below is what makes it primary work for progress pics
        if r.has_gender() and r.has_height() and r.has_current_weight():
            #pass
            #print "CLASSIFICATION: ", r.get_debug_str()
            #print "BEFORE: ", submission.to_tuple()

            # Either the value or the lc_value (low confidence)
            # value will be set. We want to set the submission with
            # one of those, preferably the non-lc version

            # Gender
            if r.gender_is_female is not None:
                submission.gender = r.gender_is_female
            else:
                submission.gender = r.lc_gender_is_female

            # Height
            if r.height_in is not None:
                submission.height_in = r.height_in
            else:
                submission.height_in = r.lc_height_in

            # Current Weight
            if r.current_weight is not None:
                submission.current_weight_lbs = r.current_weight
            else:
                submission.current_weight_lbs = r.lc_current_weight

            # Previous Weight
            if r.previous_weight is not None:
                submission.previous_weight_lbs = r.previous_weight
            else:
                submission.previous_weight_lbs = r.lc_previous_weight

            m.replace_submission(submission)
            # print "AFTER: ", submission.to_tuple()
            classifications += 1

        if r.has_current_weight() and r.has_height() and r.get_debug_str():
            weight_and_height += 1

        if r.has_height():
            atleast_height += 1

        # Start Print statements
        if r.has_gender() and r.has_height() and not r.has_current_weight():
            print "Title: ", submission.title
            print "Self text: ", submission.self_text
            # submission.manually_verified

            #exit()
            print
            # Later, we can work on the selftext
            #text = nltk.word_tokenize("And now for something completely different")
            #text2 = nltk.word_tokenize(submission.title)
            #print nltk.pos_tag(text2)

            print t.bold(t.red("CLASSIFICATION: " + r.get_debug_str()))
            print t.bold(
                t.red("LOW CONFIDENCE CLASSIFICATION: " +
                      r.get_lc_debug_str()))
            print t.bold(
                t.green("Potential weights:" +
                        ','.join(str(x) for x in r.potential_weights)))

        # End Print statements

        print "---------------------------------------------------------------------"
        #exit()
    print "stats:"
    print HITS_stats
    print "classifications: ", classifications, " out of a total: ", total
    print "Only weight and height: ", weight_and_height
    print "Atleast height: ", atleast_height