示例#1
0
def usersearch(query):
  global user_searcher_

  analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT)
  if query is None:
    return {}

  #parse query using lucene parser and get docs
  p_query = QueryParser(Version.LUCENE_CURRENT, "user", analyzer).parse(query)
  print p_query
  scoreDocs = user_searcher_.search(p_query, 50).scoreDocs
  print "%s total matching documents." % len(scoreDocs)
  
  docs = []
  for scoreDoc in scoreDocs:
    doc = user_searcher_.doc(scoreDoc.doc)
    user = doc.get("user")
    profile = UserProfiles.get_user_profile_(user)
    p = {}
    if profile:
      if profile[user]["location"] != None:
        p["hl"] = profile[user]["location"]
        if "status" in profile[user]:
          p["sts"] = {}
          p["sts"]["text"] = profile[user]["status"]["text"]
          #print p["sts"]["text"]
          p["sts"]["created_at"] = profile[user]["status"]["created_at"]
        p["foc"] = profile[user]["followers_count"]
        p["frc"] = profile[user]["friends_count"]
        p["sc"] = profile[user]["statuses_count"]
        p["name"] = profile[user]["name"]
        p["url"] = profile[user]["url"]
        p["des"] = profile[user]["description"]
        docs.append({"user": doc.get("user"), "locations": doc.get("locs"),
                "profile": p})
    else:
      docs.append({"user": doc.get("user"), "locations": doc.get("locs")})
  return cjson.encode(docs)
示例#2
0
def update_profile_information(users, docs, locations,
                               query_location, query_terms,
                               reverse_geocode=True):
  unique_users = set()
  f = open("no_profiles.txt", "a+")
  no_profile_accounts = [x.strip() for x in f.readlines()]
  profiles = UserProfiles.get_user_profile_info(users)
  #profiles = OnlineUser.get_location_time_info(users)
  for rd in docs[:]:
    #foursquare removed as we can consider it as a spam account
    #introduced this logic because sometimes the user write the wrong username while referring to a user
    #we get the profile from twitter but need to make sure that the username is corrected
    if rd["user"] not in profiles:
      found = False
      for i in profiles.keys():
        #the above logic might introduce duplicates so we remove those
        if rd["user"] == i.lower():
          rd["user"] = i
          found = True
      if not found:
        if rd["user"] not in no_profile_accounts:
          f.write(rd["user"]+"\n")
        #we could augment the tweets but leaving that for now in case the user already exists
        #but not doing that thinking not many people should be making mistakes so we won't miss
        #out on many endorsements.
        docs.remove(rd)
        continue
    if rd["user"] == "foursquare" or rd["user"] in unique_users:
      docs.remove(rd)
      continue
    unique_users.add(rd["user"])
    """
    #Not dealing with last online time, because of the real time twitter call
    #slowdown. Some profiles might have last online time but we don't want to
    #make it part of ranking yet. Just propose that it can be done.

    if 'last_online' in profiles[rd['user']]:
      rd['details']['lot'] = float(profiles[rd['user']]['last_online'])
      rd['details']['t'] = profiles[rd['user']]['status']['created_at']#last online time
    else:
      rd['details']['lot'] = float(0) #make it something else
    """
    
    rd["details"]["h"] = 0
    rd["profile"] = {"hl": ""}
    if profiles[rd["user"]]["location"] != None:
      rd["profile"]["hl"] = cjson.encode(profiles[rd["user"]]["location"])
      loc = profiles[rd["user"]]["location"].lower()
      for l in locations:
        if l in loc:
          rd["details"]["h"] = locations[l]["dwt"]
          break
      if rd["details"]["h"] == 0:
        coords = None
        if "location_coords" in profiles[rd["user"]]:
          coords = profiles[rd["user"]]["location_coords"]
        else:
          #coords = LI.get(loc, True)
          coords = LI.get(loc, False) #not invoking geocoder because of query limits
        if coords[0] is not None and coords[1] is not None and query_location is not None:
          wdl = get_weighted_distance_location(query_location[1]["lat"],
                                               query_location[1]["lng"],
                                               coords[0],
                                               coords[1])
          if wdl:
            rd["details"]["h"] = wdl["dwt"]

    if "status" in profiles[rd["user"]]:
      rd["profile"]["sts"] = {}
      if "geo" in profiles[rd["user"]]["status"] and profiles[rd["user"]]["status"]["geo"] != None:
        geo_field = profiles[rd["user"]]["status"]["geo"]
        if reverse_geocode:
          try:
            geo_results = Geocoder.reverse_geocode(float(geo_field["coordinates"][0]),
                                                 float(geo_field["coordinates"][1]))
            if geo_results:
              rd["profile"]["cl"] = str(geo_results[0])
              rd["profile"]["sts"]["geo"] = str(geo_results[0])
          except GeocoderError as e:
            print str(e)
      rd["profile"]["sts"]["text"] = json.dumps(profiles[rd["user"]]["status"]["text"])
      rd["profile"]["sts"]["created_at"] = json.dumps(profiles[rd["user"]]["status"]["created_at"])
    rd["profile"]["foc"] = profiles[rd["user"]]["followers_count"]
    rd["profile"]["frc"] = profiles[rd["user"]]["friends_count"]
    rd["profile"]["sc"] = profiles[rd["user"]]["statuses_count"]
    rd["profile"]["name"] = profiles[rd["user"]]["name"]
    rd["profile"]["url"] = json.dumps(profiles[rd["user"]]["url"])
    rd["profile"]["des"] = json.dumps(profiles[rd["user"]]["description"])
    
    rd["details"]["term_des_count"] = 0
    for i in query_terms:
      if i in rd["profile"]["des"].lower():
        rd["details"]["term_des_count"] += 1
    
    if not rd['tweets'] and rd["details"]["term_des_count"] == 0:
      docs.remove(rd)
      continue
    
    if "profile_image_url" in profiles[rd["user"]]:
      rd["profile"]["pic"] = json.dumps(profiles[rd["user"]]["profile_image_url"])
    else:
      url = UserProfiles.get_profile_image_url(rd["user"])
      if url:
        rd["profile"]["pic"] = url
  f.close()
  return docs