def create_question(question): if not question: return False question_query = Question.all(keys_only=True) question_query.filter("question_id =", question.question_id) if question_query.get(): return False logging.info("Create a new question") question.terms = extract_terms(question.title) question.terms += generate_local_terms(question.terms, question.place_ids) if question.terms: term_dict = dict(zip(question.terms, [1]*len(question.terms))) update_termstats(term_dict) db.put(question) return True
def find_relevant_questions(query, place_ids=[], max_num=10): query = query.strip() query_terms = extract_terms(query) query_terms += generate_local_terms(query_terms, place_ids) # NOTE: # the following code uses list-properties and merge-join to implement keyword search # but it leads to the problem of exploding index if len(query_terms) >= 2 # # select_str = "SELECT * FROM Question WHERE" # where_str = " AND ".join([("terms = '%s'" % term) for term in query_terms]) # order_str = "ORDER BY create_time DESC" # useful as sorted() is guaranteed to be stable # limit_str = "LIMIT 200" # questions = db.GqlQuery(select_str+" "+where_str+" "+order_str+" "+limit_str) # questions = [] if query_terms: termstats = TermStat.get_by_key_name(query_terms) term_dict = dict([(termstat.key().name(),termstat.docfreq) for termstat in termstats if termstat]) terms = sorted(term_dict.keys(), key=lambda term: term_dict[term]) best_terms = [] k = 0 for term in terms: k += 1 if len(best_terms) < 1: best_terms.append(term) else: if (k <= 5) and (term_dict[term] <= 20): best_terms.append(term) if best_terms: question_query = Question.all() question_query.filter("terms IN", best_terms) question_query.order("-create_time") questions = question_query.fetch(max_num*10) # the number of questions to be ranked if questions: questions.sort(key=lambda question: question_score(question,term_dict), reverse=True) return questions[:max_num]