def filter_words_by_property(hits, path, q, db, config, word_filter=True, filter_num=100, stopwords=True):
    concordance_object = {"query": dict([i for i in q])}

    length = config['concordance_length']

    # Do these need to be captured in wsgi_handler?
    word_property = q["word_property"]
    word_property_value = q["word_property_value"]
    word_property_total = q["word_property_total"]
    
    new_hitlist = []
    results = []
    position = 0
    more_pages = False
    
    if q.start == 0:
        start = 1
    else:
        start = q.start

    for hit in hits:
        ## get my chunk of text ##
        hit_val = get_word_attrib(hit,word_property,db)

        if hit_val == word_property_value:
            position += 1
            if position < start:
                continue
            new_hitlist.append(hit)
            citation_hrefs = citation_links(db, config, hit)
            metadata_fields = {}
            for metadata in db.locals['metadata_fields']:
                metadata_fields[metadata] = hit[metadata]
            citation = concordance_citation(hit, citation_hrefs)
            context = fetch_concordance(db, hit, config.db_path, config.concordance_length)
            result_obj = {"philo_id": hit.philo_id, "citation": citation, "citation_links": citation_hrefs, "context": context,
                          "metadata_fields": metadata_fields, "bytes": hit.bytes, "collocate_count": 1}            
            results.append(result_obj)

        if len(new_hitlist) == (q.results_per_page):
            more_pages = True
            break
    
    end = start + len(results) - 1 
    if len(results) < q.results_per_page:
        word_property_total = end
    else:
        word_property_total = end + 1
    concordance_object['results'] = results
    concordance_object["query_done"] = hits.done
    concordance_object['results_length'] = word_property_total
    concordance_object["description"] = {"start": start, "end": end, "results_per_page": q.results_per_page, "more_pages": more_pages}    
    print >> sys.stderr, "DONE"
    return concordance_object
示例#2
0
def generate_kwic_results(db, q, config, link_to_hit="div1"):
    """ The link_to_hit keyword defines the text object to which the metadata link leads to"""
    hits = db.query(q["q"],q["method"],q["arg"],**q.metadata)
    start, end, n = f.link.page_interval(q.results_per_page, hits, q.start, q.end)
    kwic_object = {"description": {"start": start, "end": end, "results_per_page": q.results_per_page},
                    "query": dict([i for i in q])}
    kwic_results = []
    
    length = config.concordance_length
    
    for hit in hits[start - 1:end]:
        # Get all metadata
        metadata_fields = {}
        for metadata in db.locals['metadata_fields']:
            metadata_fields[metadata] = hit[metadata].strip()
        
        ## Get all links and citations
        citation_hrefs = citation_links(db, config, hit)
        citation = concordance_citation(hit, citation_hrefs)
            
        ## Determine length of text needed
        byte_distance = hit.bytes[-1] - hit.bytes[0]
        length = config.concordance_length + byte_distance + config.concordance_length
            
        ## Get concordance and align it
        bytes, byte_start = adjust_bytes(hit.bytes, config.concordance_length)
        conc_text = f.get_text(hit, byte_start, length, config.db_path)
        conc_text = format_strip(conc_text, bytes)
        conc_text = KWIC_formatter(conc_text, len(hit.bytes))

        kwic_result = {"philo_id": hit.philo_id, "context": conc_text, "metadata_fields": metadata_fields,
                       "citation_links": citation_hrefs, "citation": citation, "bytes": hit.bytes}
        kwic_results.append(kwic_result)
    kwic_object['results'] = kwic_results
    kwic_object['results_length'] = len(hits)
    kwic_object["query_done"] = hits.done
    
    return kwic_object