def usersearch(query): global user_searcher_ analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT) if query is None: return {} #parse query using lucene parser and get docs p_query = QueryParser(Version.LUCENE_CURRENT, "user", analyzer).parse(query) print p_query scoreDocs = user_searcher_.search(p_query, 50).scoreDocs print "%s total matching documents." % len(scoreDocs) docs = [] for scoreDoc in scoreDocs: doc = user_searcher_.doc(scoreDoc.doc) user = doc.get("user") profile = UserProfiles.get_user_profile_(user) p = {} if profile: if profile[user]["location"] != None: p["hl"] = profile[user]["location"] if "status" in profile[user]: p["sts"] = {} p["sts"]["text"] = profile[user]["status"]["text"] #print p["sts"]["text"] p["sts"]["created_at"] = profile[user]["status"]["created_at"] p["foc"] = profile[user]["followers_count"] p["frc"] = profile[user]["friends_count"] p["sc"] = profile[user]["statuses_count"] p["name"] = profile[user]["name"] p["url"] = profile[user]["url"] p["des"] = profile[user]["description"] docs.append({"user": doc.get("user"), "locations": doc.get("locs"), "profile": p}) else: docs.append({"user": doc.get("user"), "locations": doc.get("locs")}) return cjson.encode(docs)
def update_profile_information(users, docs, locations, query_location, query_terms, reverse_geocode=True): unique_users = set() f = open("no_profiles.txt", "a+") no_profile_accounts = [x.strip() for x in f.readlines()] profiles = UserProfiles.get_user_profile_info(users) #profiles = OnlineUser.get_location_time_info(users) for rd in docs[:]: #foursquare removed as we can consider it as a spam account #introduced this logic because sometimes the user write the wrong username while referring to a user #we get the profile from twitter but need to make sure that the username is corrected if rd["user"] not in profiles: found = False for i in profiles.keys(): #the above logic might introduce duplicates so we remove those if rd["user"] == i.lower(): rd["user"] = i found = True if not found: if rd["user"] not in no_profile_accounts: f.write(rd["user"]+"\n") #we could augment the tweets but leaving that for now in case the user already exists #but not doing that thinking not many people should be making mistakes so we won't miss #out on many endorsements. docs.remove(rd) continue if rd["user"] == "foursquare" or rd["user"] in unique_users: docs.remove(rd) continue unique_users.add(rd["user"]) """ #Not dealing with last online time, because of the real time twitter call #slowdown. Some profiles might have last online time but we don't want to #make it part of ranking yet. Just propose that it can be done. if 'last_online' in profiles[rd['user']]: rd['details']['lot'] = float(profiles[rd['user']]['last_online']) rd['details']['t'] = profiles[rd['user']]['status']['created_at']#last online time else: rd['details']['lot'] = float(0) #make it something else """ rd["details"]["h"] = 0 rd["profile"] = {"hl": ""} if profiles[rd["user"]]["location"] != None: rd["profile"]["hl"] = cjson.encode(profiles[rd["user"]]["location"]) loc = profiles[rd["user"]]["location"].lower() for l in locations: if l in loc: rd["details"]["h"] = locations[l]["dwt"] break if rd["details"]["h"] == 0: coords = None if "location_coords" in profiles[rd["user"]]: coords = profiles[rd["user"]]["location_coords"] else: #coords = LI.get(loc, True) coords = LI.get(loc, False) #not invoking geocoder because of query limits if coords[0] is not None and coords[1] is not None and query_location is not None: wdl = get_weighted_distance_location(query_location[1]["lat"], query_location[1]["lng"], coords[0], coords[1]) if wdl: rd["details"]["h"] = wdl["dwt"] if "status" in profiles[rd["user"]]: rd["profile"]["sts"] = {} if "geo" in profiles[rd["user"]]["status"] and profiles[rd["user"]]["status"]["geo"] != None: geo_field = profiles[rd["user"]]["status"]["geo"] if reverse_geocode: try: geo_results = Geocoder.reverse_geocode(float(geo_field["coordinates"][0]), float(geo_field["coordinates"][1])) if geo_results: rd["profile"]["cl"] = str(geo_results[0]) rd["profile"]["sts"]["geo"] = str(geo_results[0]) except GeocoderError as e: print str(e) rd["profile"]["sts"]["text"] = json.dumps(profiles[rd["user"]]["status"]["text"]) rd["profile"]["sts"]["created_at"] = json.dumps(profiles[rd["user"]]["status"]["created_at"]) rd["profile"]["foc"] = profiles[rd["user"]]["followers_count"] rd["profile"]["frc"] = profiles[rd["user"]]["friends_count"] rd["profile"]["sc"] = profiles[rd["user"]]["statuses_count"] rd["profile"]["name"] = profiles[rd["user"]]["name"] rd["profile"]["url"] = json.dumps(profiles[rd["user"]]["url"]) rd["profile"]["des"] = json.dumps(profiles[rd["user"]]["description"]) rd["details"]["term_des_count"] = 0 for i in query_terms: if i in rd["profile"]["des"].lower(): rd["details"]["term_des_count"] += 1 if not rd['tweets'] and rd["details"]["term_des_count"] == 0: docs.remove(rd) continue if "profile_image_url" in profiles[rd["user"]]: rd["profile"]["pic"] = json.dumps(profiles[rd["user"]]["profile_image_url"]) else: url = UserProfiles.get_profile_image_url(rd["user"]) if url: rd["profile"]["pic"] = url f.close() return docs