示例#1
0
def parse_answer(answer):
    # Fetching answer
    resp = get_page(ANSWER_URL.format(answer.question, answer.writer_uname))
    doc = BeautifulSoup(resp, 'html.parser', parse_only=ANSWER_STRAIN)

    # Get Credible Users who have upvoted this answer
    users = doc.find('div', class_=CREDIBILITY_CLASS).find_all('a',
                                                               class_='user')
    for user in users:
        Profile.create_or_get(uname=user['href'].split('/')[2],
                              name=user.string)

    # Update answer stats
    answer.views = int(
        doc.find('div', class_=VIEW_ROW_CLASS).strong.string.replace(',', ''))
    answer.upvotes = int(
        doc.find('a', class_=UPVOTE_ROW_CLASS).strong.string.replace(',', ''))
    answer.last_parsed = datetime.datetime.now()
    answer.save()

    # Saving the HTML code of the profile
    # Storing Answers in not feasible.
    #filename = str(answer.id) + '.html'
    #with open(os.path.join(ANSWERS_FOLDER, filename), 'w+') as fstream:
    #  fstream.write(resp)

    sys.stdout.write('\rDone Parsing Answer id %d (%d)' %
                     (answer.id, len(users)))
    sys.stdout.flush()
def parse_answer(answer):
  # Fetching answer
  resp = get_page(ANSWER_URL.format(answer.question, answer.writer_uname))
  doc = BeautifulSoup(resp, 'html.parser', parse_only=ANSWER_STRAIN)

  # Get Credible Users who have upvoted this answer
  users = doc.find('div', class_=CREDIBILITY_CLASS).find_all('a', class_='user')
  for user in users:
    Profile.create_or_get(uname=user['href'].split('/')[2], name=user.string)

  # Update answer stats
  answer.views =  int(doc.find('div', class_=VIEW_ROW_CLASS).strong.string
                      .replace(',', ''))
  answer.upvotes = int(doc.find('a', class_=UPVOTE_ROW_CLASS).strong.string
                       .replace(',', ''))
  answer.last_parsed = datetime.datetime.now()
  answer.save()

  # Saving the HTML code of the profile
  filename = str(answer.id) + '.html'
  with open(os.path.join(ANSWERS_FOLDER, filename), 'w+') as fstream:
    fstream.write(resp)

  sys.stdout.write('\rDone Parsing Answer id %d (%d)' % (answer.id, len(users)))
  sys.stdout.flush()
示例#3
0
def profile_with_max_views(limit=20):
    profiles = Profile.select().order_by(Profile.views.desc()).limit(limit)

    print("Top %d Writers with most answers - " % limit)
    for profile in profiles:
        u = PROFILE_URL.format(profile.uname)
        print("{0} ({1} views)".format(u, profile.views))
示例#4
0
def profile_with_max_followers(limit=20):
    profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit)

    print("Top %d Most followed - " % limit)
    for profile in profiles:
        u = PROFILE_URL.format(profile.uname)
        print("{0} ({1} followers)".format(u, profile.followers))
示例#5
0
def profile_with_max_followers(limit=20):
  profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit)

  print('Top %d Most followed - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print('{0} ({1} followers)'.format(u, profile.followers))
示例#6
0
def profile_with_max_views(limit=20):
  profiles = Profile.select().order_by(Profile.views.desc()).limit(limit)

  print('Top %d Writers with most answers - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print('{0} ({1} views)'.format(u, profile.views))
示例#7
0
def answers_with_max_upvotes(limit=20):
  answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit)

  print('Top %d Most upvoted answers - ' % limit)
  total_views = 0
  total_upvotes = 0
  writer = {}
  for answer in answers:
    u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
    print("{0} ({1}, {2}, {3})".format(u, answer.upvotes,
          answer.views, answer.views / answer.upvotes))
    total_upvotes += answer.upvotes
    total_views += answer.views
    writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1

  print("Total Views = {0}".format(total_views))
  print("Total Upvotes = {0}".format(total_upvotes))
  print("Average Views = {0}".format(total_views / limit))
  print("Average Upvotes = {0}".format(total_upvotes / limit))
  avg_up = (float(total_upvotes) / float(total_views)) * 100
  print("On an average %.2f viewers upvoted the answer" % avg_up)

  # Writer Stat
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  notw = 0
  for w in writer_list:
    if w['uname'] in writer:
      notw += 1
  print("{0} People on this list are Top Writers(2016)".format(notw))
  sorted_writer = sorted(writer.items(), key=operator.itemgetter(1),
                         reverse=True)
  print("Total number of unique writers is {0}".format(len(sorted_writer)))
  total_followers = 0
  total_answers = 0
  for tup in sorted_writer:
    profile = Profile.get(Profile.uname == tup[0])
    total_followers += int(profile.followers)
    total_answers += int(profile.total_answers)
  print("Average number of followers of each {0}".format(
    total_followers / len(sorted_writer)))
  print("Average number of answers written by each is {}".format(
    total_answers / len(sorted_writer)))

  # Plotting Graph
  figure = mpplt.figure(figsize=(10, 10))
  plt = figure.add_subplot(1, 1, 1)
  plt.set_title("Views vs Upvote")
  plt.plot([answer.views for answer in answers],
           [answer.upvotes for answer in answers],
           '.', color='green')
  plt.set_xlabel('Views')
  plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.set_xlim([0, 1500000])
  plt.set_ylim([10000, 25000])
  plt.set_ylabel('Upvotes')
  figure.savefig('view_upvote.png', facecolor='white', edgecolor='black')
示例#8
0
def answers_with_max_upvotes(limit=20):
    answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit)

    print("Top %d Most upvoted answers - " % limit)
    total_views = 0
    total_upvotes = 0
    writer = {}
    for answer in answers:
        u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
        print("{0} ({1}, {2}, {3})".format(u, answer.upvotes, answer.views, answer.views / answer.upvotes))
        total_upvotes += answer.upvotes
        total_views += answer.views
        writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1

    print("Total Views = {0}".format(total_views))
    print("Total Upvotes = {0}".format(total_upvotes))
    print("Average Views = {0}".format(total_views / limit))
    print("Average Upvotes = {0}".format(total_upvotes / limit))
    avg_up = (float(total_upvotes) / float(total_views)) * 100
    print("On an average %.2f viewers upvoted the answer" % avg_up)

    # Writer Stat
    with open("top_writers_2016.json", "r") as fstream:
        writer_list = json.load(fstream)
    notw = 0
    for w in writer_list:
        if w["uname"] in writer:
            notw += 1
    print("{0} People on this list are Top Writers(2016)".format(notw))
    sorted_writer = sorted(writer.items(), key=operator.itemgetter(1), reverse=True)
    print("Total number of unique writers is {0}".format(len(sorted_writer)))
    total_followers = 0
    total_answers = 0
    for tup in sorted_writer:
        profile = Profile.get(Profile.uname == tup[0])
        total_followers += int(profile.followers)
        total_answers += int(profile.total_answers)
    print("Average number of followers of each {0}".format(total_followers / len(sorted_writer)))
    print("Average number of answers written by each is {}".format(total_answers / len(sorted_writer)))

    # Plotting Graph
    figure = mpplt.figure(figsize=(10, 10))
    plt = figure.add_subplot(1, 1, 1)
    plt.set_title("Views vs Upvote")
    plt.plot([answer.views for answer in answers], [answer.upvotes for answer in answers], ".", color="green")
    plt.set_xlabel("Views")
    plt.ticklabel_format(style="sci", axis="x", scilimits=(0, 0))
    plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
    plt.set_xlim([0, 1500000])
    plt.set_ylim([10000, 25000])
    plt.set_ylabel("Upvotes")
    figure.savefig("view_upvote.png", facecolor="white", edgecolor="black")
示例#9
0
def gateway():
    if is_logged_in():
        return redirect(url_for("index"))
    err = ""
    if request.method == "POST":
        try:
            email = fix(request.form["email"], "@\.")
            username = fix(request.form["username"])
            password = fix(request.form["password"])
            confirm = fix(request.form["confirm"])
            if validate_email(email):
                if not Profile.already_exists(email=email):
                    if validate_username(username):
                        if not Profile.already_exists(username=username):
                            if password == confirm and password != "":
                                Profile.insert_profile(email, username,
                                                       hash_password(password))
                                return render_template("gateway.html",
                                                       title="Gateway")
                            else:
                                err = "Invalid Passwords Entered"
                        else:
                            err = "Username Already Exists"
                    else:
                        err = "Invalid Username Entered"
                else:
                    err = "Email Already Exists"
            else:
                err = "Invalid Email Entered"
        except Exception as e:
            email = fix(request.form["email"], "@\.")
            password = fix(request.form["password"])
            if email is not None and password is not None:
                if Profile.already_exists(email=email):
                    expected_pass = Profile.get_password_hash(email)
                    salt, pass_hash = str(expected_pass).split("$")
                    if expected_pass == str(hash_password(password, salt)):
                        session["email"] = email
                        session["username"] = Profile.get("username",
                                                          email=email)
                        return redirect(url_for("index"))
                    else:
                        err = "Incorrect Email or Password"
                else:
                    err = "Incorrect Email"
            else:
                err = "Fill in all blanks"

    reg = True if request.args.get("reg") == "true" else False
    return render_template("gateway.html",
                           title="Gateway",
                           indent=False,
                           reg=reg,
                           err=err)
示例#10
0
def profile_with_max_edits(limit=20):
  profiles = Profile.select().order_by(Profile.edits.desc()).limit(limit)

  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0
  total_edits = 0

  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1} edits)'.format(u, profile.posts))
    total_edits += profile.posts
    if profile.uname in tw:
      top_writer_count += 1

  avg_edits = total_edits / len(profiles)
  print("Total number of edits is %d" % total_edits)
  print("Average number of edits by a single user is %d" % avg_edits)
  print("Number on Top Writers 2016 on this list is %d" % top_writer_count)

  # Visualisation
  fig = plt.figure(figsize=(21, 14))
  plt.title("Users with Most Edits on Quora")
  ydata = [prof.edits for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('User')
  plt.ylabel('Posts Written by User')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_edits, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('most_posts.png', facecolor='white', edgecolor='black')
示例#11
0
                        action='store_true',
                        help='Do not Crawl Profiles')
    parser.add_argument('--no_answer',
                        action='store_true',
                        help='Do not Crawl Answers')
    args = parser.parse_args()

    # Filling Database with Top Writers 2016
    with open('top_writers_2016.json', 'r') as fstream:
        writer_list = json.load(fstream)
    with open('other_writers.json', 'r') as fstream:
        writer_list += json.load(fstream)
    create_directory(ANSWERS_FOLDER)
    create_directory(PROFILE_FOLDER)
    for writer in writer_list:
        new = Profile.create_or_get(uname=writer['uname'],
                                    name=writer['name'])[1]
        if new: print(u'New Profile %s Created' % writer['uname'])
    #print "Number of Writers Added = ", len(writer_list)

    # Starting to Crawl
    total_parsing = 0
    max_crawl = args.max_crawl
    while total_parsing < max_crawl:
        if not args.no_profile:
            # Parse Old Profiles
            old_time = datetime.datetime.now() - datetime.timedelta(days=7)
            old_profiles = Profile.select().where(
                Profile.last_parsed <= old_time).limit(max_crawl -
                                                       total_parsing)
            total_parsing += len(old_profiles)
            print "Number of Profiles to Crawl - ", len(old_profiles)
                      help='Number of maximum requests to make')
  parser.add_argument('--no_profile', action='store_true',
                      help='Do not Crawl Profiles')
  parser.add_argument('--no_answer', action='store_true',
                      help='Do not Crawl Answers')
  args = parser.parse_args()

  # Filling Database with Top Writers 2016
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  with open('other_writers.json', 'r') as fstream:
    writer_list += json.load(fstream)
  create_directory(ANSWERS_FOLDER)
  create_directory(PROFILE_FOLDER)
  for writer in writer_list:
    new = Profile.create_or_get(uname=writer['uname'], name=writer['name'])[1]
    if new: print(u'New Profile %s Created' % writer['uname'])
  #print "Number of Writers Added = ", len(writer_list)

  # Starting to Crawl
  total_parsing = 0
  max_crawl = args.max_crawl
  while total_parsing < max_crawl:
    if not args.no_profile:
      # Parse Old Profiles
      old_time = datetime.datetime.now() - datetime.timedelta(days=7)
      old_profiles = Profile.select().where(
        Profile.last_parsed <= old_time).limit(max_crawl - total_parsing)
      total_parsing += len(old_profiles)
      print "Number of Profiles to Crawl - ", len(old_profiles)
      for profile in old_profiles:
示例#13
0
def profile_with_max_views(limit=200):
  profiles = Profile.select().order_by(Profile.views.desc()).limit(limit)

  total_views = 0
  total_followers = 0
  total_following = 0
  total_answers = 0
  total_views = 0

  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0

  print('Top %d Writers with most answers - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1})'.format(u, profile.views))
    total_views += profile.views
    total_followers += profile.followers
    total_following += profile.following
    total_views += profile.views
    total_answers += profile.answers

    if profile.uname in tw:
      top_writer_count += 1

  average_per_f = total_views / total_followers

  # Per Follower Plot
  fig = plt.figure(figsize=(21, 14))
  plt.title("Views / Follower for Most Viewed Writers")
  ydata = [float(prof.views) / prof.followers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('Views per Follower')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(average_per_f, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('views_per_follower.png', facecolor='white', edgecolor='black')

  fig = plt.figure(figsize=(21, 14))
  plt.title("All-time Views")
  total_views -= profiles[0].views + profiles[1].views
  profiles = profiles[2:]
  average_views = total_views / len(profiles)
  ydata = [prof.views for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('All Time Answer Views')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(average_views, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('all_time_views.png', facecolor='white', edgecolor='black')
示例#14
0
def profile_with_max_answers(limit=20):
  profiles = Profile.select().order_by(
    Profile.total_answers.desc()).limit(limit)

  total_answers = 0
  total_views = 0
  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0

  print('Top %d Writers with most answers - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1} answers)'.format(u, profile.total_answers))
    total_answers += profile.total_answers
    total_views += profile.views
    if profile.uname in tw:
      top_writer_count += 1

  avg_views_per_answer = total_views / total_answers
  avg_views_per_user = total_views / len(profiles)
  avg_ans_per_user = total_answers / len(profiles)

  print("Total number of answers is %d" % total_answers)
  print("Average number of answers per user is %d" % avg_ans_per_user)
  print("Average number of views per answer is %d" % avg_views_per_answer)
  print("Average all time views for a user is %d" % avg_views_per_user)
  print("Number of Top Writers 2016 in this list is %d" % top_writer_count)

  fig = plt.figure(figsize=(21, 14))
  plt.title("Users with most answers on Quora")
  ydata = [prof.total_answers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('Number of answers written')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_ans_per_user, -0.5, len(profiles) - 0.5,
             label='Average Number of Answers', colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  plt.legend()
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('most_answers.png', facecolor='white', edgecolor='black')

  fig = plt.figure(figsize=(21, 14))
  plt.title("Average views on each answer for users with most answers on Quora")
  ydata = [float(prof.views) / prof.total_answers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('All time views on answers / Number of answers')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_views_per_answer, -0.5, len(profiles) - 0.5,
             label='Average Number of views / answer', colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  plt.legend()
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('view_avg_most_ans.png', facecolor='white', edgecolor='black')
def answers_with_max_upvotes(limit=20):
  answers = Answer.select().order_by(Answer.upvotes.desc()).limit(limit)

  print('Top %d Most upvoted answers - ' % limit)
  total_views = 0
  total_upvotes = 0
  writer = {}
  max_views = 0
  for answer in answers:
    u = ANSWER_URL.format(answer.question, answer.writer_uname).encode("UTF-8")
    print("{0} ({1}, {2}, {3})".format(u, answer.upvotes,
          answer.views, answer.views / answer.upvotes))
    total_upvotes += answer.upvotes
    total_views += answer.views
    writer[answer.writer_uname] = writer.get(answer.writer_uname, 0) + 1
    if answer.views > max_views :
      max_views = answer.views
      print "Max - ", u, answer.views

  print("Total Views = {0}".format(total_views))
  print("Total Upvotes = {0}".format(total_upvotes))
  print("Average Views = {0}".format(total_views / limit))
  print("Average Upvotes = {0}".format(total_upvotes / limit))
  avg_up = (float(total_upvotes) / float(total_views)) * 100
  print("On an average %.2f viewers upvoted the answer" % avg_up)

  # Writer Stat
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  notw = 0
  for w in writer_list:
    if w['uname'] in writer:
      notw += 1
  print("{0} People on this list are Top Writers(2016)".format(notw))
  sorted_writer = sorted(writer.items(), key=operator.itemgetter(1),
                         reverse=True)
  print sorted_writer[:10]
  print("Total number of unique writers is {0}".format(len(sorted_writer)))
  total_followers = 0
  total_answers = 0
  for tup in sorted_writer:
    profile = Profile.get(Profile.uname == tup[0])
    total_followers += int(profile.followers)
    total_answers += int(profile.total_answers)
  print("Average number of followers of each {0}".format(
    total_followers / len(sorted_writer)))
  print("Average number of answers written by each is {}".format(
    total_answers / len(sorted_writer)))

  # Plotting Graph
  figure = plt.figure(figsize=(10, 10))
  splt = figure.add_subplot(1, 1, 1)
  splt.set_title("Views vs Upvote")
  splt.plot([answer.views for answer in answers],
           [answer.upvotes for answer in answers],
           '.', color='green')
  splt.set_xlabel('Views')
  splt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  splt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  splt.set_xlim([0, 1500000])
  splt.set_ylim([10000, 25000])
  splt.set_ylabel('Upvotes')
  figure.tight_layout()
  figure.savefig('view_upvote.png', facecolor='white', edgecolor='black')
示例#16
0
def profile(username):
    if not Profile.already_exists(username=username):
        return redirect(url_for("index"))
    return render_template("profile.html",
                           title="%s's Profile" % username,
                           username=username)
示例#17
0
    "https://s3.amazonaws.com/uifaces/faces/twitter/langate/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/anoff/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/osvaldas/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/jayrobinson/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/jm_denis/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/brandclay/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/VinThomas/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/_victa/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/saschamt/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/victorerixon/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/noxdzine/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/yalozhkin/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/flame_kaizar/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/baliomega/128.jpg",
    "https://s3.amazonaws.com/uifaces/faces/twitter/terryxlife/128.jpg"
]

for j in "abcdefghijklmnoprst":
    for i, name in enumerate(NAMES):
        email = name.lower().replace(" ", ".") + ("*****@*****.**" % (j))
        face = FACES[i]
        user = User(email=email,
                    active=True,
                    display_name=name,
                    photo_url=face)
        profile = Profile(external_key=random_address_hash(), data_json="{}")
        user.profiles.append(profile)
        db.session.add(user)
        db.session.commit()
        print "Added %s" % (name)
示例#18
0
def profile_with_max_followers(limit=20):
  profiles = Profile.select().order_by(Profile.followers.desc()).limit(limit)

  total_views = 0
  total_followers = 0
  total_following = 0
  total_answers = 0
  total_views = 0
  # Top Writer
  tw = {}
  with open('top_writers_2016.json', 'r') as fstream:
    writer_list = json.load(fstream)
  for writer in writer_list:
    tw[writer['uname']] = 0
  top_writer_count = 0

  print('Top %d Most followed - ' % limit)
  for profile in profiles:
    u = PROFILE_URL.format(profile.uname)
    print(u'{0} ({1} followers)'.format(u, profile.followers))
    total_views += profile.views
    total_followers += profile.followers
    total_following += profile.following
    total_views += profile.views
    total_answers += profile.total_answers
    if profile.uname in tw:
      top_writer_count += 1

  avg_followers = total_followers / len(profiles)
  avg_following = total_following / len(profiles)
  avg_answers = total_answers / len(profiles)
  avg_views = total_views / len(profiles)
  avg_v_gain_f = total_views / total_followers
  avg_f_gain_ans = total_followers / total_answers

  print("Total number of all-time views is %d" % total_views)
  print("Average number of all-time views is %d" % avg_views)
  print("Average number of followers is %d" % avg_followers)
  print("Average number of views gained per follower is %d" % avg_v_gain_f)
  print("Total number of answers written by them is %d" % total_answers)
  print("Average number of answers written by each is %d" % avg_answers)
  print("Average number of people these writers follow is %d" % avg_following)
  print("Average number of followers gained per answer is %d" % avg_f_gain_ans)
  print("Number on Top Writers 2016 is %d" % top_writer_count)

  fig = plt.figure(figsize=(21, 14))
  plt.title("Users with most followers on Quora")
  ydata = [prof.followers for prof in profiles]
  xticks = [prof.name for prof in profiles]
  plt.xlabel('Profile')
  plt.ylabel('Number of followers')
  plt.bar(range(len(profiles)), ydata, align='center')
  plt.xticks(range(len(profiles)), xticks, rotation=90)
  plt.hlines(avg_followers, -0.5, len(profiles) - 0.5, label='Average',
             colors='red')
  #plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
  #plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
  plt.xlim([-0.5, len(profiles) - 0.5])
  fig.tight_layout()
  #plt.set_ylim([10000, 25000])
  plt.savefig('most_followed_users.png', facecolor='white', edgecolor='black')

  ff_ratio = []
  xticks = []
  fa_ratio = []
  for p in profiles:
    if p.total_answers > 10 and p.following > 10 and \
      p.followers / p.following < 4000:
      ff_ratio.append(float(p.followers) / p.following)
      fa_ratio.append(float(p.followers) / p.total_answers)
      xticks.append(p.name)

  print(len(ff_ratio))
  print(len(fa_ratio))
  avg_ff = float(sum(ff_ratio)) / len(ff_ratio)
  avg_fa = float(sum(fa_ratio)) / len(fa_ratio)
  xpos_ff = range(len(ff_ratio))
  xpos_fa = [x + 0.4 for x in range(len(fa_ratio))]
  tick_pos = [x + 0.4 for x in range(len(fa_ratio))]

  fig = plt.figure(figsize=(21, 14))
  plt.title("Metrics of Users with most followers on Quora")
  bar1 = plt.bar(xpos_ff, ff_ratio, 0.35, color='red')
  bar2 = plt.bar(xpos_fa, fa_ratio, 0.35, color='blue')
  l1 = plt.hlines(avg_ff, -0.5, len(ff_ratio), linestyles='dashed', color='red',
             label='Average Followers / Following ratio')
  l2 = plt.hlines(avg_fa, -0.5, len(ff_ratio), linestyles='dashed',
                  color='blue', label='Average Followers / Answers ratio')
  plt.xlabel('Users')
  plt.ylabel('Ratio Value')
  plt.xticks(tick_pos, xticks, rotation=90)
  plt.legend((bar1[0], bar2[0], l1, l2),
             ("Follower / Following Ratio",
              "Follower / Answer Ratio",
              "Average Followers / Following ratio",
              "Average Followers / Answers ratio"))
  plt.xlim([-0.5, len(ff_ratio)])
  fig.tight_layout()
  plt.savefig('user_metric.png', facecolor='white', edgecolor='black')