Пример #1
0
def parse_query(raw_query):
    """in: a query as entered by the user
    out: a list whose keys are canonical terms from the query"""
    logging.debug("Parsing query")
    query = []
    for raw_term in raw_query.split(" "):
        term = cleanup_term(raw_term)
        if term not in query:
            query.append(term)
    
    return query
Пример #2
0
def build_index(index, doclist):
    """in: existing index, list of document names to add to the index
    out: updated index, a dictionary whose keys are cleaned-up terms
         and whose values are a list of documents containing the term"""
    logging.debug("Building index")
    for docname in doclist:
        doctext = load_document(docname)
        for raw_term in split_document(doctext):
            term = cleanup_term(raw_term)
            if term and index.get(term, None):
                entry = index[term]
                if docname not in entry:
                    index[term].append(docname)
            else:
                index[term] = [docname]